All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@redhat.com>
To: Ingo Molnar <mingo@elte.hu>
Cc: "Frédéric Weisbecker" <fweisbec@gmail.com>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Mike Galbraith" <efault@gmx.de>,
	"Linux Kernel Mailing List" <linux-kernel@vger.kernel.org>
Subject: [PATCH 1/1 tip] perf tools: Use rb_tree for maps
Date: Mon, 28 Sep 2009 14:48:46 -0300	[thread overview]
Message-ID: <20090928174846.GA3361@ghostprotocols.net> (raw)

Threads can have many and kernel modules will be represented as a tree
of maps as well.

Ah, and for a perf.data with 146607 samples:

Before:

[root@doppio ~]# perf stat -r 5 perf report > /dev/null

 Performance counter stats for 'perf report' (5 runs):

     699.823680  task-clock-msecs         #      0.991 CPUs    ( +-   0.454% )
             74  context-switches         #      0.000 M/sec   ( +-   1.709% )
              2  CPU-migrations           #      0.000 M/sec   ( +-  17.008% )
          23114  page-faults              #      0.033 M/sec   ( +-   0.000% )
     1381257019  cycles                   #   1973.721 M/sec   ( +-   0.290% )
     1456894438  instructions             #      1.055 IPC     ( +-   0.007% )
       18779818  cache-references         #     26.835 M/sec   ( +-   0.380% )
         641799  cache-misses             #      0.917 M/sec   ( +-   1.200% )

    0.705972729  seconds time elapsed   ( +-   0.501% )

[root@doppio ~]#

After

 Performance counter stats for 'perf report' (5 runs):

     691.261451  task-clock-msecs         #      0.993 CPUs    ( +-   0.307% )
             72  context-switches         #      0.000 M/sec   ( +-   0.829% )
              6  CPU-migrations           #      0.000 M/sec   ( +-  18.409% )
          23127  page-faults              #      0.033 M/sec   ( +-   0.000% )
     1366395876  cycles                   #   1976.670 M/sec   ( +-   0.153% )
     1443136016  instructions             #      1.056 IPC     ( +-   0.012% )
       17956402  cache-references         #     25.976 M/sec   ( +-   0.325% )
         661924  cache-misses             #      0.958 M/sec   ( +-   1.335% )

    0.696127275  seconds time elapsed   ( +-   0.377% )

I.e. we see some speedup too.

Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile      |    1 +
 tools/perf/util/event.h  |    4 +-
 tools/perf/util/thread.c |  129 +++++++++++++++++++++++++++++----------------
 tools/perf/util/thread.h |   12 +++-
 4 files changed, 95 insertions(+), 51 deletions(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 0a9e5ae..17b0757 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -340,6 +340,7 @@ LIB_H += util/module.h
 LIB_H += util/color.h
 LIB_H += util/values.h
 LIB_H += util/sort.h
+LIB_H += util/thread.h
 
 LIB_OBJS += util/abspath.o
 LIB_OBJS += util/alias.o
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index c31a5da..4c69eb5 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -3,7 +3,7 @@
 
 #include "../perf.h"
 #include "util.h"
-#include <linux/list.h>
+#include <linux/rbtree.h>
 
 enum {
 	SHOW_KERNEL	= 1,
@@ -79,7 +79,7 @@ typedef union event_union {
 } event_t;
 
 struct map {
-	struct list_head	node;
+	struct rb_node		rb_node;
 	u64			start;
 	u64			end;
 	u64			pgoff;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 45efb5d..9d0945c 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -15,7 +15,7 @@ static struct thread *thread__new(pid_t pid)
 		self->comm = malloc(32);
 		if (self->comm)
 			snprintf(self->comm, 32, ":%d", self->pid);
-		INIT_LIST_HEAD(&self->maps);
+		self->maps = RB_ROOT;
 	}
 
 	return self;
@@ -31,11 +31,13 @@ int thread__set_comm(struct thread *self, const char *comm)
 
 static size_t thread__fprintf(struct thread *self, FILE *fp)
 {
-	struct map *pos;
+	struct rb_node *nd;
 	size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
 
-	list_for_each_entry(pos, &self->maps, node)
+	for (nd = rb_first(&self->maps); nd; nd = rb_next(nd)) {
+		struct map *pos = rb_entry(nd, struct map, rb_node);
 		ret += map__fprintf(pos, fp);
+	}
 
 	return ret;
 }
@@ -93,42 +95,90 @@ register_idle_thread(struct rb_root *threads, struct thread **last_match)
 	return thread;
 }
 
-void thread__insert_map(struct thread *self, struct map *map)
+static void thread__remove_overlappings(struct thread *self, struct map *map)
 {
-	struct map *pos, *tmp;
-
-	list_for_each_entry_safe(pos, tmp, &self->maps, node) {
-		if (map__overlap(pos, map)) {
-			if (verbose >= 2) {
-				printf("overlapping maps:\n");
-				map__fprintf(map, stdout);
-				map__fprintf(pos, stdout);
-			}
-
-			if (map->start <= pos->start && map->end > pos->start)
-				pos->start = map->end;
-
-			if (map->end >= pos->end && map->start < pos->end)
-				pos->end = map->start;
-
-			if (verbose >= 2) {
-				printf("after collision:\n");
-				map__fprintf(pos, stdout);
-			}
-
-			if (pos->start >= pos->end) {
-				list_del_init(&pos->node);
-				free(pos);
-			}
+	struct rb_node *next = rb_first(&self->maps);
+
+	while (next) {
+		struct map *pos = rb_entry(next, struct map, rb_node);
+		next = rb_next(&pos->rb_node);
+
+		if (!map__overlap(pos, map))
+			continue;
+
+		if (verbose >= 2) {
+			printf("overlapping maps:\n");
+			map__fprintf(map, stdout);
+			map__fprintf(pos, stdout);
+		}
+
+		if (map->start <= pos->start && map->end > pos->start)
+			pos->start = map->end;
+
+		if (map->end >= pos->end && map->start < pos->end)
+			pos->end = map->start;
+
+		if (verbose >= 2) {
+			printf("after collision:\n");
+			map__fprintf(pos, stdout);
+		}
+
+		if (pos->start >= pos->end) {
+			rb_erase(&pos->rb_node, &self->maps);
+			free(pos);
 		}
 	}
+}
+
+void maps__insert(struct rb_root *maps, struct map *map)
+{
+	struct rb_node **p = &maps->rb_node;
+	struct rb_node *parent = NULL;
+	const u64 ip = map->start;
+	struct map *m;
+
+	while (*p != NULL) {
+		parent = *p;
+		m = rb_entry(parent, struct map, rb_node);
+		if (ip < m->start)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&map->rb_node, parent, p);
+	rb_insert_color(&map->rb_node, maps);
+}
+
+struct map *maps__find(struct rb_root *maps, u64 ip)
+{
+	struct rb_node **p = &maps->rb_node;
+	struct rb_node *parent = NULL;
+	struct map *m;
+
+	while (*p != NULL) {
+		parent = *p;
+		m = rb_entry(parent, struct map, rb_node);
+		if (ip < m->start)
+			p = &(*p)->rb_left;
+		else if (ip > m->end)
+			p = &(*p)->rb_right;
+		else
+			return m;
+	}
+
+	return NULL;
+}
 
-	list_add_tail(&map->node, &self->maps);
+void thread__insert_map(struct thread *self, struct map *map)
+{
+	thread__remove_overlappings(self, map);
+	maps__insert(&self->maps, map);
 }
 
 int thread__fork(struct thread *self, struct thread *parent)
 {
-	struct map *map;
+	struct rb_node *nd;
 
 	if (self->comm)
 		free(self->comm);
@@ -136,7 +186,8 @@ int thread__fork(struct thread *self, struct thread *parent)
 	if (!self->comm)
 		return -ENOMEM;
 
-	list_for_each_entry(map, &parent->maps, node) {
+	for (nd = rb_first(&parent->maps); nd; nd = rb_next(nd)) {
+		struct map *map = rb_entry(nd, struct map, rb_node);
 		struct map *new = map__clone(map);
 		if (!new)
 			return -ENOMEM;
@@ -146,20 +197,6 @@ int thread__fork(struct thread *self, struct thread *parent)
 	return 0;
 }
 
-struct map *thread__find_map(struct thread *self, u64 ip)
-{
-	struct map *pos;
-
-	if (self == NULL)
-		return NULL;
-
-	list_for_each_entry(pos, &self->maps, node)
-		if (ip >= pos->start && ip <= pos->end)
-			return pos;
-
-	return NULL;
-}
-
 size_t threads__fprintf(FILE *fp, struct rb_root *threads)
 {
 	size_t ret = 0;
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 693ed1e..bbb37c1 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -2,13 +2,12 @@
 #define __PERF_THREAD_H
 
 #include <linux/rbtree.h>
-#include <linux/list.h>
 #include <unistd.h>
 #include "symbol.h"
 
 struct thread {
 	struct rb_node		rb_node;
-	struct list_head	maps;
+	struct rb_root		maps;
 	pid_t			pid;
 	char			shortname[3];
 	char			*comm;
@@ -21,7 +20,14 @@ struct thread *
 register_idle_thread(struct rb_root *threads, struct thread **last_match);
 void thread__insert_map(struct thread *self, struct map *map);
 int thread__fork(struct thread *self, struct thread *parent);
-struct map *thread__find_map(struct thread *self, u64 ip);
 size_t threads__fprintf(FILE *fp, struct rb_root *threads);
 
+void maps__insert(struct rb_root *maps, struct map *map);
+struct map *maps__find(struct rb_root *maps, u64 ip);
+
+static inline struct map *thread__find_map(struct thread *self, u64 ip)
+{
+	return self ? maps__find(&self->maps, ip) : NULL;
+}
+
 #endif	/* __PERF_THREAD_H */
-- 
1.6.2.5


                 reply	other threads:[~2009-09-28 17:49 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090928174846.GA3361@ghostprotocols.net \
    --to=acme@redhat.com \
    --cc=efault@gmx.de \
    --cc=fweisbec@gmail.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.