linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: tip-bot for Namhyung Kim <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: acme@redhat.com, linux-kernel@vger.kernel.org, paulus@samba.org,
	hpa@zytor.com, mingo@kernel.org, a.p.zijlstra@chello.nl,
	torvalds@linux-foundation.org, namhyung.kim@lge.com,
	namhyung@kernel.org, jolsa@redhat.com, fweisbec@gmail.com,
	tglx@linutronix.de
Subject: [tip:perf/core] perf callchain: Convert children list to rbtree
Date: Wed, 23 Oct 2013 00:54:48 -0700	[thread overview]
Message-ID: <tip-e369517ce5f796945c6af047b4e8b1d650e03458@git.kernel.org> (raw)
In-Reply-To: <1381468543-25334-2-git-send-email-namhyung@kernel.org>

Commit-ID:  e369517ce5f796945c6af047b4e8b1d650e03458
Gitweb:     http://git.kernel.org/tip/e369517ce5f796945c6af047b4e8b1d650e03458
Author:     Namhyung Kim <namhyung.kim@lge.com>
AuthorDate: Fri, 11 Oct 2013 14:15:36 +0900
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Mon, 21 Oct 2013 17:33:23 -0300

perf callchain: Convert children list to rbtree

Current collapse stage has a scalability problem which can be reproduced
easily with a parallel kernel build.

This is because it needs to traverse every children of callchains
linearly during the collapse/merge stage.

Converting it to a rbtree reduced the overhead significantly.

On my 400MB perf.data file which recorded with make -j32 kernel build:

  $ time perf --no-pager report --stdio > /dev/null

before:
  real	6m22.073s
  user	6m18.683s
  sys	0m0.706s

after:
  real	0m20.780s
  user	0m19.962s
  sys	0m0.689s

During the perf report the overhead on append_chain_children went down
from 96.69% to 18.16%:

  -  18.16%  perf  perf                [.] append_chain_children
     - append_chain_children
        - 77.48% append_chain_children
           + 69.79% merge_chain_branch
           - 22.96% append_chain_children
              + 67.44% merge_chain_branch
              + 30.15% append_chain_children
              + 2.41% callchain_append
           + 7.25% callchain_append
        + 12.26% callchain_append
        + 10.22% merge_chain_branch
  +  11.58%  perf  perf                [.] dso__find_symbol
  +   8.02%  perf  perf                [.] sort__comm_cmp
  +   5.48%  perf  libc-2.17.so        [.] malloc_consolidate

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1381468543-25334-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/callchain.c | 147 +++++++++++++++++++++++++++++++++-----------
 tools/perf/util/callchain.h |  11 ++--
 2 files changed, 116 insertions(+), 42 deletions(-)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 482f680..e3970e3 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -21,12 +21,6 @@
 
 __thread struct callchain_cursor callchain_cursor;
 
-#define chain_for_each_child(child, parent)	\
-	list_for_each_entry(child, &parent->children, siblings)
-
-#define chain_for_each_child_safe(child, next, parent)	\
-	list_for_each_entry_safe(child, next, &parent->children, siblings)
-
 static void
 rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
 		    enum chain_mode mode)
@@ -71,10 +65,16 @@ static void
 __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
 		  u64 min_hit)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 
-	chain_for_each_child(child, node)
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
 		__sort_chain_flat(rb_root, child, min_hit);
+	}
 
 	if (node->hit && node->hit >= min_hit)
 		rb_insert_callchain(rb_root, node, CHAIN_FLAT);
@@ -94,11 +94,16 @@ sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
 static void __sort_chain_graph_abs(struct callchain_node *node,
 				   u64 min_hit)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 
 	node->rb_root = RB_ROOT;
+	n = rb_first(&node->rb_root_in);
+
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
 
-	chain_for_each_child(child, node) {
 		__sort_chain_graph_abs(child, min_hit);
 		if (callchain_cumul_hits(child) >= min_hit)
 			rb_insert_callchain(&node->rb_root, child,
@@ -117,13 +122,18 @@ sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
 static void __sort_chain_graph_rel(struct callchain_node *node,
 				   double min_percent)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 	u64 min_hit;
 
 	node->rb_root = RB_ROOT;
 	min_hit = ceil(node->children_hit * min_percent);
 
-	chain_for_each_child(child, node) {
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
 		__sort_chain_graph_rel(child, min_percent);
 		if (callchain_cumul_hits(child) >= min_hit)
 			rb_insert_callchain(&node->rb_root, child,
@@ -173,19 +183,26 @@ create_child(struct callchain_node *parent, bool inherit_children)
 		return NULL;
 	}
 	new->parent = parent;
-	INIT_LIST_HEAD(&new->children);
 	INIT_LIST_HEAD(&new->val);
 
 	if (inherit_children) {
-		struct callchain_node *next;
+		struct rb_node *n;
+		struct callchain_node *child;
+
+		new->rb_root_in = parent->rb_root_in;
+		parent->rb_root_in = RB_ROOT;
 
-		list_splice(&parent->children, &new->children);
-		INIT_LIST_HEAD(&parent->children);
+		n = rb_first(&new->rb_root_in);
+		while (n) {
+			child = rb_entry(n, struct callchain_node, rb_node_in);
+			child->parent = new;
+			n = rb_next(n);
+		}
 
-		chain_for_each_child(next, new)
-			next->parent = new;
+		/* make it the first child */
+		rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node);
+		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
 	}
-	list_add_tail(&new->siblings, &parent->children);
 
 	return new;
 }
@@ -223,7 +240,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 	}
 }
 
-static void
+static struct callchain_node *
 add_child(struct callchain_node *parent,
 	  struct callchain_cursor *cursor,
 	  u64 period)
@@ -235,6 +252,19 @@ add_child(struct callchain_node *parent,
 
 	new->children_hit = 0;
 	new->hit = period;
+	return new;
+}
+
+static s64 match_chain(struct callchain_cursor_node *node,
+		      struct callchain_list *cnode)
+{
+	struct symbol *sym = node->sym;
+
+	if (cnode->ms.sym && sym &&
+	    callchain_param.key == CCKEY_FUNCTION)
+		return cnode->ms.sym->start - sym->start;
+	else
+		return cnode->ip - node->ip;
 }
 
 /*
@@ -272,9 +302,33 @@ split_add_child(struct callchain_node *parent,
 
 	/* create a new child for the new branch if any */
 	if (idx_total < cursor->nr) {
+		struct callchain_node *first;
+		struct callchain_list *cnode;
+		struct callchain_cursor_node *node;
+		struct rb_node *p, **pp;
+
 		parent->hit = 0;
-		add_child(parent, cursor, period);
 		parent->children_hit += period;
+
+		node = callchain_cursor_current(cursor);
+		new = add_child(parent, cursor, period);
+
+		/*
+		 * This is second child since we moved parent's children
+		 * to new (first) child above.
+		 */
+		p = parent->rb_root_in.rb_node;
+		first = rb_entry(p, struct callchain_node, rb_node_in);
+		cnode = list_first_entry(&first->val, struct callchain_list,
+					 list);
+
+		if (match_chain(node, cnode) < 0)
+			pp = &p->rb_left;
+		else
+			pp = &p->rb_right;
+
+		rb_link_node(&new->rb_node_in, p, pp);
+		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
 	} else {
 		parent->hit = period;
 	}
@@ -291,16 +345,40 @@ append_chain_children(struct callchain_node *root,
 		      u64 period)
 {
 	struct callchain_node *rnode;
+	struct callchain_cursor_node *node;
+	struct rb_node **p = &root->rb_root_in.rb_node;
+	struct rb_node *parent = NULL;
+
+	node = callchain_cursor_current(cursor);
+	if (!node)
+		return;
 
 	/* lookup in childrens */
-	chain_for_each_child(rnode, root) {
-		unsigned int ret = append_chain(rnode, cursor, period);
+	while (*p) {
+		s64 ret;
+		struct callchain_list *cnode;
 
-		if (!ret)
+		parent = *p;
+		rnode = rb_entry(parent, struct callchain_node, rb_node_in);
+		cnode = list_first_entry(&rnode->val, struct callchain_list,
+					 list);
+
+		/* just check first entry */
+		ret = match_chain(node, cnode);
+		if (ret == 0) {
+			append_chain(rnode, cursor, period);
 			goto inc_children_hit;
+		}
+
+		if (ret < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
 	}
 	/* nothing in children, add to the current node */
-	add_child(root, cursor, period);
+	rnode = add_child(root, cursor, period);
+	rb_link_node(&rnode->rb_node_in, parent, p);
+	rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
 
 inc_children_hit:
 	root->children_hit += period;
@@ -325,28 +403,20 @@ append_chain(struct callchain_node *root,
 	 */
 	list_for_each_entry(cnode, &root->val, list) {
 		struct callchain_cursor_node *node;
-		struct symbol *sym;
 
 		node = callchain_cursor_current(cursor);
 		if (!node)
 			break;
 
-		sym = node->sym;
-
-		if (cnode->ms.sym && sym &&
-		    callchain_param.key == CCKEY_FUNCTION) {
-			if (cnode->ms.sym->start != sym->start)
-				break;
-		} else if (cnode->ip != node->ip)
+		if (match_chain(node, cnode) != 0)
 			break;
 
-		if (!found)
-			found = true;
+		found = true;
 
 		callchain_cursor_advance(cursor);
 	}
 
-	/* matches not, relay on the parent */
+	/* matches not, relay no the parent */
 	if (!found) {
 		cursor->curr = curr_snap;
 		cursor->pos = start;
@@ -395,8 +465,9 @@ merge_chain_branch(struct callchain_cursor *cursor,
 		   struct callchain_node *dst, struct callchain_node *src)
 {
 	struct callchain_cursor_node **old_last = cursor->last;
-	struct callchain_node *child, *next_child;
+	struct callchain_node *child;
 	struct callchain_list *list, *next_list;
+	struct rb_node *n;
 	int old_pos = cursor->nr;
 	int err = 0;
 
@@ -412,12 +483,16 @@ merge_chain_branch(struct callchain_cursor *cursor,
 		append_chain_children(dst, cursor, src->hit);
 	}
 
-	chain_for_each_child_safe(child, next_child, src) {
+	n = rb_first(&src->rb_root_in);
+	while (n) {
+		child = container_of(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+		rb_erase(&child->rb_node_in, &src->rb_root_in);
+
 		err = merge_chain_branch(cursor, dst, child);
 		if (err)
 			break;
 
-		list_del(&child->siblings);
 		free(child);
 	}
 
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 2b585bc..7bb3602 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -21,11 +21,11 @@ enum chain_order {
 
 struct callchain_node {
 	struct callchain_node	*parent;
-	struct list_head	siblings;
-	struct list_head	children;
 	struct list_head	val;
-	struct rb_node		rb_node; /* to sort nodes in an rbtree */
-	struct rb_root		rb_root; /* sorted tree of children */
+	struct rb_node		rb_node_in; /* to insert nodes in an rbtree */
+	struct rb_node		rb_node;    /* to sort nodes in an output tree */
+	struct rb_root		rb_root_in; /* input tree of children */
+	struct rb_root		rb_root;    /* sorted output tree of children */
 	unsigned int		val_nr;
 	u64			hit;
 	u64			children_hit;
@@ -86,13 +86,12 @@ extern __thread struct callchain_cursor callchain_cursor;
 
 static inline void callchain_init(struct callchain_root *root)
 {
-	INIT_LIST_HEAD(&root->node.siblings);
-	INIT_LIST_HEAD(&root->node.children);
 	INIT_LIST_HEAD(&root->node.val);
 
 	root->node.parent = NULL;
 	root->node.hit = 0;
 	root->node.children_hit = 0;
+	root->node.rb_root_in = RB_ROOT;
 	root->max_depth = 0;
 }
 

  reply	other threads:[~2013-10-23  7:55 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-11  5:15 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Namhyung Kim
2013-10-11  5:15 ` [PATCH 1/8] perf callchain: Convert children list to rbtree Namhyung Kim
2013-10-23  7:54   ` tip-bot for Namhyung Kim [this message]
2013-10-23 11:07     ` [tip:perf/core] " Frederic Weisbecker
2013-10-23 12:45       ` Arnaldo Carvalho de Melo
2013-10-11  5:15 ` [PATCH 2/8] perf ui/progress: Add new helper functions for progress bar Namhyung Kim
2013-10-21 18:13   ` Arnaldo Carvalho de Melo
2013-10-22 18:12     ` Namhyung Kim
2013-10-22 13:06       ` Arnaldo Carvalho de Melo
2013-10-11  5:15 ` [PATCH 3/8] perf tools: Show progress on histogram collapsing Namhyung Kim
2013-10-25 10:33   ` [tip:perf/core] " tip-bot for Namhyung Kim
2013-10-28 15:03   ` [tip:perf/urgent] perf script python: Fix mem leak due to missing Py_DECREFs on dict entries tip-bot for Joseph Schuchart
2013-10-11  5:15 ` [PATCH 4/8] perf tools: Use an accessor to read thread comm Namhyung Kim
2013-10-11  5:15 ` [PATCH 5/8] perf tools: Add time argument on comm setting Namhyung Kim
2013-10-11  5:15 ` [PATCH 6/8] perf tools: Add new comm infrastructure Namhyung Kim
2013-10-25 10:56   ` Frederic Weisbecker
2013-10-25 13:04     ` Arnaldo Carvalho de Melo
2013-10-25 15:33       ` David Ahern
2013-10-25 18:12         ` Frederic Weisbecker
2013-10-25 18:14           ` Arnaldo Carvalho de Melo
2013-10-25 18:19           ` David Ahern
2013-10-28  5:38             ` Namhyung Kim
2013-10-28  9:09               ` Frederic Weisbecker
2013-10-28  9:15                 ` Namhyung Kim
2013-10-28 10:12                   ` Frederic Weisbecker
2013-10-28 12:43                     ` Arnaldo Carvalho de Melo
2013-10-28 14:29                       ` Arnaldo Carvalho de Melo
2013-10-28 16:05                         ` Frederic Weisbecker
2013-10-28 17:01                           ` Arnaldo Carvalho de Melo
2013-10-28 17:48                             ` Arnaldo Carvalho de Melo
2013-10-29  9:20                               ` Frederic Weisbecker
2013-10-29 13:06                                 ` Arnaldo Carvalho de Melo
2013-10-11  5:15 ` [PATCH 7/8] perf tools: Compare hists comm by addresses Namhyung Kim
2013-11-04 20:19   ` [tip:perf/core] " tip-bot for Frederic Weisbecker
2013-10-11  5:15 ` [PATCH 8/8] perf tools: Get current comm instead of last one Namhyung Kim
2013-10-11  5:58 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Ingo Molnar
2013-10-11  7:34   ` Jiri Olsa
2013-10-11  8:24     ` Namhyung Kim
2013-10-11 12:59       ` Ingo Molnar
2013-10-11 13:04         ` Peter Zijlstra
2013-10-11 15:11     ` David Ahern
2013-10-11 15:20       ` David Ahern
2013-10-11 21:51         ` Andi Kleen
2013-10-11 22:04           ` David Ahern
2013-10-13 10:25             ` Jiri Olsa
2013-10-13 21:18               ` [RFC] perf record,top: Add callchain option into .perfconfig Jiri Olsa
2013-10-13 21:32                 ` Andi Kleen
2013-10-14  7:56               ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Ingo Molnar
2013-10-12 16:53         ` Ingo Molnar
2013-10-12 19:42           ` David Ahern
2013-10-13  5:23             ` Ingo Molnar
2013-10-25 19:09               ` RFP: Fixing "-ga -ag -g fp -g dwarf" was " Arnaldo Carvalho de Melo
2013-10-25 19:22                 ` David Ahern
2013-10-25 19:46                   ` Arnaldo Carvalho de Melo
2013-10-26 12:03                     ` Ingo Molnar
2013-10-26 12:35                       ` Jiri Olsa
2013-10-26 14:25                       ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
2013-10-26 14:25                         ` [PATCH 1/4] perf tools: Split -g and --call-graph for record command Jiri Olsa
2013-10-27 15:30                           ` David Ahern
2013-10-28 17:46                             ` Arnaldo Carvalho de Melo
2013-10-28 18:20                               ` David Ahern
2013-10-29  5:13                                 ` Namhyung Kim
2013-10-28  7:59                           ` Namhyung Kim
2013-10-29 10:18                             ` Jiri Olsa
2013-10-29 12:42                               ` Arnaldo Carvalho de Melo
2013-10-29  8:22                           ` [tip:perf/urgent] perf record: Split -g and --call-graph tip-bot for Jiri Olsa
2013-10-26 14:25                         ` [PATCH 2/4] perf tools: Split -G and --call-graph for top command Jiri Olsa
2013-10-27 15:34                           ` David Ahern
2013-10-28  8:06                             ` Namhyung Kim
2013-10-29  8:22                           ` [tip:perf/urgent] perf top: Split -G and --call-graph tip-bot for Jiri Olsa
2013-10-26 14:25                         ` [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig Jiri Olsa
2013-10-27 15:36                           ` David Ahern
2013-10-28  8:10                           ` Namhyung Kim
2013-10-29 10:18                             ` Jiri Olsa
2013-10-29 12:43                               ` Arnaldo Carvalho de Melo
2013-10-29 12:46                                 ` Ingo Molnar
2013-11-01 15:20                               ` Jiri Olsa
2013-10-26 14:25                         ` [PATCH 4/4] perf tools: Add readable output for callchain debug Jiri Olsa
2013-10-27 15:39                           ` David Ahern
2013-10-26 14:32                         ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
2013-10-27  6:56                           ` Ingo Molnar
2013-10-29 10:21                             ` Jiri Olsa
2013-10-29 10:25                               ` Ingo Molnar
2013-10-13 12:34 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Jiri Olsa
2013-10-14  1:06   ` Namhyung Kim
2013-10-14  4:50   ` Namhyung Kim
2013-10-14  8:01     ` Jiri Olsa
2013-10-14  8:41       ` Namhyung Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=tip-e369517ce5f796945c6af047b4e8b1d650e03458@git.kernel.org \
    --to=tipbot@zytor.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@redhat.com \
    --cc=fweisbec@gmail.com \
    --cc=hpa@zytor.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung.kim@lge.com \
    --cc=namhyung@kernel.org \
    --cc=paulus@samba.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).