All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Andi Kleen <andi@firstfloor.org>
Cc: Jiri Olsa <jolsa@kernel.org>,
	linux-kernel@vger.kernel.org, Andi Kleen <ak@linux.intel.com>,
	Namhyung Kim <namhyung@kernel.org>,
	David Ahern <dsahern@gmail.com>, Ingo Molnar <mingo@kernel.org>,
	Wang Nan <wangnan0@huawei.com>
Subject: Re: [PATCH] perf, tools, script: Skip locking overhead in perf script
Date: Tue, 4 Apr 2017 13:17:39 -0300	[thread overview]
Message-ID: <20170404161739.GH12903@kernel.org> (raw)
In-Reply-To: <20170324002807.26101-1-andi@firstfloor.org>

Em Thu, Mar 23, 2017 at 05:28:07PM -0700, Andi Kleen escreveu:
> From: Andi Kleen <ak@linux.intel.com>
> 
> I was tired of seeing ~10% locking overhead in perf script while
> doing larger PT decodes. perf script doesn't need any locking

> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index f53f449d864d..b404644f559a 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c

Huh, should this be builtin-script? Anyway, below would be a more
complete patch, but I couldn't find improvement at least with a 50M
perf.data file with intel_pt data... will try some more later.

- Arnaldo


commit 42ee7ffd3f88047839837d9b35eef23740a86429
Author: Arnaldo Carvalho de Melo <acme@redhat.com>
Date:   Tue Apr 4 13:15:04 2017 -0300

    WIP
    
    Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 46acc8ece41f..e6262d36f0de 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2493,6 +2493,8 @@ int cmd_script(int argc, const char **argv)
 		NULL
 	};
 
+	perf_set_singlethreaded();
+
 	setup_scripting();
 
 	argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 5c0ea11a8f0a..9319291414a5 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -75,6 +75,7 @@ libperf-y += data.o
 libperf-y += tsc.o
 libperf-y += cloexec.o
 libperf-y += call-path.o
+libperf-y += rwsem.o
 libperf-y += thread-stack.o
 libperf-$(CONFIG_AUXTRACE) += auxtrace.o
 libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index c1870ac365a3..08dd01fd2b44 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -476,7 +476,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
 static void maps__init(struct maps *maps)
 {
 	maps->entries = RB_ROOT;
-	pthread_rwlock_init(&maps->lock, NULL);
+	init_rwsem(&maps->lock);
 }
 
 void map_groups__init(struct map_groups *mg, struct machine *machine)
@@ -505,9 +505,9 @@ static void __maps__purge(struct maps *maps)
 
 static void maps__exit(struct maps *maps)
 {
-	pthread_rwlock_wrlock(&maps->lock);
+	down_write(&maps->lock);
 	__maps__purge(maps);
-	pthread_rwlock_unlock(&maps->lock);
+	up_write(&maps->lock);
 }
 
 void map_groups__exit(struct map_groups *mg)
@@ -574,7 +574,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
 	struct symbol *sym;
 	struct rb_node *nd;
 
-	pthread_rwlock_rdlock(&maps->lock);
+	down_read(&maps->lock);
 
 	for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
 		struct map *pos = rb_entry(nd, struct map, rb_node);
@@ -590,7 +590,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
 
 	sym = NULL;
 out:
-	pthread_rwlock_unlock(&maps->lock);
+	up_read(&maps->lock);
 	return sym;
 }
 
@@ -626,7 +626,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
 	size_t printed = 0;
 	struct rb_node *nd;
 
-	pthread_rwlock_rdlock(&maps->lock);
+	down_read(&maps->lock);
 
 	for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
 		struct map *pos = rb_entry(nd, struct map, rb_node);
@@ -638,7 +638,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
 		}
 	}
 
-	pthread_rwlock_unlock(&maps->lock);
+	up_read(&maps->lock);
 
 	return printed;
 }
@@ -670,7 +670,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
 	struct rb_node *next;
 	int err = 0;
 
-	pthread_rwlock_wrlock(&maps->lock);
+	down_write(&maps->lock);
 
 	root = &maps->entries;
 	next = rb_first(root);
@@ -738,7 +738,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
 
 	err = 0;
 out:
-	pthread_rwlock_unlock(&maps->lock);
+	up_write(&maps->lock);
 	return err;
 }
 
@@ -759,7 +759,7 @@ int map_groups__clone(struct thread *thread,
 	struct map *map;
 	struct maps *maps = &parent->maps[type];
 
-	pthread_rwlock_rdlock(&maps->lock);
+	down_read(&maps->lock);
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		struct map *new = map__clone(map);
@@ -776,7 +776,7 @@ int map_groups__clone(struct thread *thread,
 
 	err = 0;
 out_unlock:
-	pthread_rwlock_unlock(&maps->lock);
+	up_read(&maps->lock);
 	return err;
 }
 
@@ -803,9 +803,9 @@ static void __maps__insert(struct maps *maps, struct map *map)
 
 void maps__insert(struct maps *maps, struct map *map)
 {
-	pthread_rwlock_wrlock(&maps->lock);
+	down_write(&maps->lock);
 	__maps__insert(maps, map);
-	pthread_rwlock_unlock(&maps->lock);
+	up_write(&maps->lock);
 }
 
 static void __maps__remove(struct maps *maps, struct map *map)
@@ -816,9 +816,9 @@ static void __maps__remove(struct maps *maps, struct map *map)
 
 void maps__remove(struct maps *maps, struct map *map)
 {
-	pthread_rwlock_wrlock(&maps->lock);
+	down_write(&maps->lock);
 	__maps__remove(maps, map);
-	pthread_rwlock_unlock(&maps->lock);
+	up_write(&maps->lock);
 }
 
 struct map *maps__find(struct maps *maps, u64 ip)
@@ -826,7 +826,7 @@ struct map *maps__find(struct maps *maps, u64 ip)
 	struct rb_node **p, *parent = NULL;
 	struct map *m;
 
-	pthread_rwlock_rdlock(&maps->lock);
+	down_read(&maps->lock);
 
 	p = &maps->entries.rb_node;
 	while (*p != NULL) {
@@ -842,7 +842,7 @@ struct map *maps__find(struct maps *maps, u64 ip)
 
 	m = NULL;
 out:
-	pthread_rwlock_unlock(&maps->lock);
+	up_read(&maps->lock);
 	return m;
 }
 
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index c8a5a644c0a9..fd76534a7807 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -9,6 +9,7 @@
 #include <stdio.h>
 #include <stdbool.h>
 #include <linux/types.h>
+#include "rwsem.h"
 
 enum map_type {
 	MAP__FUNCTION = 0,
@@ -61,7 +62,7 @@ struct kmap {
 
 struct maps {
 	struct rb_root	 entries;
-	pthread_rwlock_t lock;
+	struct rw_semaphore lock;
 };
 
 struct map_groups {
diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c
new file mode 100644
index 000000000000..02b0f18b96eb
--- /dev/null
+++ b/tools/perf/util/rwsem.c
@@ -0,0 +1,27 @@
+#include "util.h"
+#include "rwsem.h"
+
+int init_rwsem(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_init(&sem->lock, NULL);
+}
+
+int down_read(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock);
+}
+
+int up_read(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
+
+int down_write(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock);
+}
+
+int up_write(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h
new file mode 100644
index 000000000000..c06afd42dc11
--- /dev/null
+++ b/tools/perf/util/rwsem.h
@@ -0,0 +1,18 @@
+#ifndef _PERF_RWSEM_H
+#define _PERF_RWSEM_H
+
+#include <pthread.h>
+
+struct rw_semaphore {
+	pthread_rwlock_t lock;
+};
+
+int init_rwsem(struct rw_semaphore *sem);
+
+int down_read(struct rw_semaphore *sem);
+int up_read(struct rw_semaphore *sem);
+
+int down_write(struct rw_semaphore *sem);
+int up_write(struct rw_semaphore *sem);
+
+#endif /* _PERF_RWSEM_H */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 9b4d8ba22fed..d3caa5e5f845 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -210,7 +210,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
 	struct maps *maps = &mg->maps[type];
 	struct map *next, *curr;
 
-	pthread_rwlock_wrlock(&maps->lock);
+	down_write(&maps->lock);
 
 	curr = maps__first(maps);
 	if (curr == NULL)
@@ -228,7 +228,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
 	curr->end = ~0ULL;
 
 out_unlock:
-	pthread_rwlock_unlock(&maps->lock);
+	up_write(&maps->lock);
 }
 
 struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
@@ -1561,7 +1561,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
 	struct maps *maps = &mg->maps[type];
 	struct map *map;
 
-	pthread_rwlock_rdlock(&maps->lock);
+	down_read(&maps->lock);
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		if (map->dso && strcmp(map->dso->short_name, name) == 0)
@@ -1571,7 +1571,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
 	map = NULL;
 
 out_unlock:
-	pthread_rwlock_unlock(&maps->lock);
+	up_read(&maps->lock);
 	return map;
 }
 
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index dcdb87a5d0a1..4da6e776a189 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -259,7 +259,7 @@ static int __thread__prepare_access(struct thread *thread)
 		struct maps *maps = &thread->mg->maps[i];
 		struct map *map;
 
-		pthread_rwlock_rdlock(&maps->lock);
+		down_read(&maps->lock);
 
 		for (map = maps__first(maps); map; map = map__next(map)) {
 			err = unwind__prepare_access(thread, map, &initialized);
@@ -267,7 +267,7 @@ static int __thread__prepare_access(struct thread *thread)
 				break;
 		}
 
-		pthread_rwlock_unlock(&maps->lock);
+		up_read(&maps->lock);
 	}
 
 	return err;
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index d8b45cea54d0..a016880a55e6 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -20,6 +20,13 @@
 #include "callchain.h"
 #include "strlist.h"
 
+bool perf_singlethreaded;
+
+void perf_set_singlethreaded(void)
+{
+	perf_singlethreaded = true;
+}
+
 #define CALLCHAIN_PARAM_DEFAULT			\
 	.mode		= CHAIN_GRAPH_ABS,	\
 	.min_percent	= 0.5,			\
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 7cf5752b38fd..e59aa319a2e1 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -380,4 +380,7 @@ struct inline_node {
 struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr);
 void inline_node__delete(struct inline_node *node);
 
+extern bool perf_singlethreaded;
+void perf_set_singlethreaded(void);
+
 #endif /* GIT_COMPAT_UTIL_H */

  reply	other threads:[~2017-04-04 16:17 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-24  0:28 [PATCH] perf, tools, script: Skip locking overhead in perf script Andi Kleen
2017-04-04 16:17 ` Arnaldo Carvalho de Melo [this message]
2017-09-22 16:46   ` [tip:perf/core] perf tools: Provide mutex wrappers for pthreads rwlocks tip-bot for Arnaldo Carvalho de Melo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170404161739.GH12903@kernel.org \
    --to=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=andi@firstfloor.org \
    --cc=dsahern@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=wangnan0@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.