All of lore.kernel.org
 help / color / mirror / Atom feed
From: tip-bot for Kan Liang <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: mingo@kernel.org, ak@linux.intel.com, lukasz.odzioba@intel.com,
	kan.liang@intel.com, peterz@infradead.org, jolsa@kernel.org,
	linux-kernel@vger.kernel.org, hpa@zytor.com, namhyung@kernel.org,
	adrian.hunter@intel.com, acme@redhat.com, tglx@linutronix.de
Subject: [tip:perf/core] perf machine: Use hashtable for machine threads
Date: Fri, 22 Sep 2017 09:42:29 -0700	[thread overview]
Message-ID: <tip-91e467bc568f15da2eac688e131010601e889184@git.kernel.org> (raw)
In-Reply-To: <1505096603-215017-2-git-send-email-kan.liang@intel.com>

Commit-ID:  91e467bc568f15da2eac688e131010601e889184
Gitweb:     http://git.kernel.org/tip/91e467bc568f15da2eac688e131010601e889184
Author:     Kan Liang <kan.liang@intel.com>
AuthorDate: Sun, 10 Sep 2017 19:23:14 -0700
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Mon, 18 Sep 2017 09:40:19 -0300

perf machine: Use hashtable for machine threads

To process any events, it needs to find the thread in the machine first.
The machine maintains a rb tree to store all threads. The rb tree is
protected by a rw lock.

It is not a problem for current perf which serially processing events.
However, it will have scalability performance issue to process events in
parallel, especially on a heavy load system which have many threads.

Introduce a hashtable to divide the big rb tree into many samll rb tree
for threads. The index is thread id % hashtable size. It can reduce the
lock contention.

Committer notes:

Renamed some variables and function names to reduce semantic confusion:

  'struct threads' pointers: thread -> threads
  threads hastable index: tid -> hash_bucket
  struct threads *machine__thread() -> machine__threads()
  Cast tid to (unsigned int) to handle -1 in machine__threads() (Kan Liang)

Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Lukasz Odzioba <lukasz.odzioba@intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1505096603-215017-2-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c  |  19 ++++---
 tools/perf/util/machine.c   | 136 +++++++++++++++++++++++++++-----------------
 tools/perf/util/machine.h   |  23 ++++++--
 tools/perf/util/rb_resort.h |   5 +-
 4 files changed, 117 insertions(+), 66 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 771ddab..ee8c6e8 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2730,20 +2730,23 @@ DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_event
 
 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
 {
-	DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
 	size_t printed = trace__fprintf_threads_header(fp);
 	struct rb_node *nd;
+	int i;
 
-	if (threads == NULL) {
-		fprintf(fp, "%s", "Error sorting output by nr_events!\n");
-		return 0;
-	}
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
 
-	resort_rb__for_each_entry(nd, threads)
-		printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+		if (threads == NULL) {
+			fprintf(fp, "%s", "Error sorting output by nr_events!\n");
+			return 0;
+		}
 
-	resort_rb__delete(threads);
+		resort_rb__for_each_entry(nd, threads)
+			printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
 
+		resort_rb__delete(threads);
+	}
 	return printed;
 }
 
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index df70936..f4f9267 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -33,6 +33,20 @@ static void dsos__init(struct dsos *dsos)
 	pthread_rwlock_init(&dsos->lock, NULL);
 }
 
+static void machine__threads_init(struct machine *machine)
+{
+	int i;
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		struct threads *threads = &machine->threads[i];
+		threads->entries = RB_ROOT;
+		pthread_rwlock_init(&threads->lock, NULL);
+		threads->nr = 0;
+		INIT_LIST_HEAD(&threads->dead);
+		threads->last_match = NULL;
+	}
+}
+
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 {
 	memset(machine, 0, sizeof(*machine));
@@ -40,11 +54,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 	RB_CLEAR_NODE(&machine->rb_node);
 	dsos__init(&machine->dsos);
 
-	machine->threads = RB_ROOT;
-	pthread_rwlock_init(&machine->threads_lock, NULL);
-	machine->nr_threads = 0;
-	INIT_LIST_HEAD(&machine->dead_threads);
-	machine->last_match = NULL;
+	machine__threads_init(machine);
 
 	machine->vdso_info = NULL;
 	machine->env = NULL;
@@ -141,27 +151,37 @@ static void dsos__exit(struct dsos *dsos)
 void machine__delete_threads(struct machine *machine)
 {
 	struct rb_node *nd;
+	int i;
 
-	pthread_rwlock_wrlock(&machine->threads_lock);
-	nd = rb_first(&machine->threads);
-	while (nd) {
-		struct thread *t = rb_entry(nd, struct thread, rb_node);
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		struct threads *threads = &machine->threads[i];
+		pthread_rwlock_wrlock(&threads->lock);
+		nd = rb_first(&threads->entries);
+		while (nd) {
+			struct thread *t = rb_entry(nd, struct thread, rb_node);
 
-		nd = rb_next(nd);
-		__machine__remove_thread(machine, t, false);
+			nd = rb_next(nd);
+			__machine__remove_thread(machine, t, false);
+		}
+		pthread_rwlock_unlock(&threads->lock);
 	}
-	pthread_rwlock_unlock(&machine->threads_lock);
 }
 
 void machine__exit(struct machine *machine)
 {
+	int i;
+
 	machine__destroy_kernel_maps(machine);
 	map_groups__exit(&machine->kmaps);
 	dsos__exit(&machine->dsos);
 	machine__exit_vdso(machine);
 	zfree(&machine->root_dir);
 	zfree(&machine->current_tid);
-	pthread_rwlock_destroy(&machine->threads_lock);
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		struct threads *threads = &machine->threads[i];
+		pthread_rwlock_destroy(&threads->lock);
+	}
 }
 
 void machine__delete(struct machine *machine)
@@ -382,7 +402,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
 						  pid_t pid, pid_t tid,
 						  bool create)
 {
-	struct rb_node **p = &machine->threads.rb_node;
+	struct threads *threads = machine__threads(machine, tid);
+	struct rb_node **p = &threads->entries.rb_node;
 	struct rb_node *parent = NULL;
 	struct thread *th;
 
@@ -391,14 +412,14 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
 	 * so most of the time we dont have to look up
 	 * the full rbtree:
 	 */
-	th = machine->last_match;
+	th = threads->last_match;
 	if (th != NULL) {
 		if (th->tid == tid) {
 			machine__update_thread_pid(machine, th, pid);
 			return thread__get(th);
 		}
 
-		machine->last_match = NULL;
+		threads->last_match = NULL;
 	}
 
 	while (*p != NULL) {
@@ -406,7 +427,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
 		th = rb_entry(parent, struct thread, rb_node);
 
 		if (th->tid == tid) {
-			machine->last_match = th;
+			threads->last_match = th;
 			machine__update_thread_pid(machine, th, pid);
 			return thread__get(th);
 		}
@@ -423,7 +444,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
 	th = thread__new(pid, tid);
 	if (th != NULL) {
 		rb_link_node(&th->rb_node, parent, p);
-		rb_insert_color(&th->rb_node, &machine->threads);
+		rb_insert_color(&th->rb_node, &threads->entries);
 
 		/*
 		 * We have to initialize map_groups separately
@@ -434,7 +455,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
 		 * leader and that would screwed the rb tree.
 		 */
 		if (thread__init_map_groups(th, machine)) {
-			rb_erase_init(&th->rb_node, &machine->threads);
+			rb_erase_init(&th->rb_node, &threads->entries);
 			RB_CLEAR_NODE(&th->rb_node);
 			thread__put(th);
 			return NULL;
@@ -443,8 +464,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
 		 * It is now in the rbtree, get a ref
 		 */
 		thread__get(th);
-		machine->last_match = th;
-		++machine->nr_threads;
+		threads->last_match = th;
+		++threads->nr;
 	}
 
 	return th;
@@ -458,21 +479,24 @@ struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid
 struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
 				       pid_t tid)
 {
+	struct threads *threads = machine__threads(machine, tid);
 	struct thread *th;
 
-	pthread_rwlock_wrlock(&machine->threads_lock);
+	pthread_rwlock_wrlock(&threads->lock);
 	th = __machine__findnew_thread(machine, pid, tid);
-	pthread_rwlock_unlock(&machine->threads_lock);
+	pthread_rwlock_unlock(&threads->lock);
 	return th;
 }
 
 struct thread *machine__find_thread(struct machine *machine, pid_t pid,
 				    pid_t tid)
 {
+	struct threads *threads = machine__threads(machine, tid);
 	struct thread *th;
-	pthread_rwlock_rdlock(&machine->threads_lock);
+
+	pthread_rwlock_rdlock(&threads->lock);
 	th =  ____machine__findnew_thread(machine, pid, tid, false);
-	pthread_rwlock_unlock(&machine->threads_lock);
+	pthread_rwlock_unlock(&threads->lock);
 	return th;
 }
 
@@ -719,21 +743,24 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
 
 size_t machine__fprintf(struct machine *machine, FILE *fp)
 {
-	size_t ret;
 	struct rb_node *nd;
+	size_t ret;
+	int i;
 
-	pthread_rwlock_rdlock(&machine->threads_lock);
-
-	ret = fprintf(fp, "Threads: %u\n", machine->nr_threads);
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		struct threads *threads = &machine->threads[i];
+		pthread_rwlock_rdlock(&threads->lock);
 
-	for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
-		struct thread *pos = rb_entry(nd, struct thread, rb_node);
+		ret = fprintf(fp, "Threads: %u\n", threads->nr);
 
-		ret += thread__fprintf(pos, fp);
-	}
+		for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+			struct thread *pos = rb_entry(nd, struct thread, rb_node);
 
-	pthread_rwlock_unlock(&machine->threads_lock);
+			ret += thread__fprintf(pos, fp);
+		}
 
+		pthread_rwlock_unlock(&threads->lock);
+	}
 	return ret;
 }
 
@@ -1479,23 +1506,25 @@ out_problem:
 
 static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
 {
-	if (machine->last_match == th)
-		machine->last_match = NULL;
+	struct threads *threads = machine__threads(machine, th->tid);
+
+	if (threads->last_match == th)
+		threads->last_match = NULL;
 
 	BUG_ON(refcount_read(&th->refcnt) == 0);
 	if (lock)
-		pthread_rwlock_wrlock(&machine->threads_lock);
-	rb_erase_init(&th->rb_node, &machine->threads);
+		pthread_rwlock_wrlock(&threads->lock);
+	rb_erase_init(&th->rb_node, &threads->entries);
 	RB_CLEAR_NODE(&th->rb_node);
-	--machine->nr_threads;
+	--threads->nr;
 	/*
 	 * Move it first to the dead_threads list, then drop the reference,
 	 * if this is the last reference, then the thread__delete destructor
 	 * will be called and we will remove it from the dead_threads list.
 	 */
-	list_add_tail(&th->node, &machine->dead_threads);
+	list_add_tail(&th->node, &threads->dead);
 	if (lock)
-		pthread_rwlock_unlock(&machine->threads_lock);
+		pthread_rwlock_unlock(&threads->lock);
 	thread__put(th);
 }
 
@@ -2140,21 +2169,26 @@ int machine__for_each_thread(struct machine *machine,
 			     int (*fn)(struct thread *thread, void *p),
 			     void *priv)
 {
+	struct threads *threads;
 	struct rb_node *nd;
 	struct thread *thread;
 	int rc = 0;
+	int i;
 
-	for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
-		thread = rb_entry(nd, struct thread, rb_node);
-		rc = fn(thread, priv);
-		if (rc != 0)
-			return rc;
-	}
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		threads = &machine->threads[i];
+		for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+			thread = rb_entry(nd, struct thread, rb_node);
+			rc = fn(thread, priv);
+			if (rc != 0)
+				return rc;
+		}
 
-	list_for_each_entry(thread, &machine->dead_threads, node) {
-		rc = fn(thread, priv);
-		if (rc != 0)
-			return rc;
+		list_for_each_entry(thread, &threads->dead, node) {
+			rc = fn(thread, priv);
+			if (rc != 0)
+				return rc;
+		}
 	}
 	return rc;
 }
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 3cdb134..fe2f058 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -23,6 +23,17 @@ extern const char *ref_reloc_sym_names[];
 
 struct vdso_info;
 
+#define THREADS__TABLE_BITS	8
+#define THREADS__TABLE_SIZE	(1 << THREADS__TABLE_BITS)
+
+struct threads {
+	struct rb_root	  entries;
+	pthread_rwlock_t  lock;
+	unsigned int	  nr;
+	struct list_head  dead;
+	struct thread	  *last_match;
+};
+
 struct machine {
 	struct rb_node	  rb_node;
 	pid_t		  pid;
@@ -30,11 +41,7 @@ struct machine {
 	bool		  comm_exec;
 	bool		  kptr_restrict_warned;
 	char		  *root_dir;
-	struct rb_root	  threads;
-	pthread_rwlock_t  threads_lock;
-	unsigned int	  nr_threads;
-	struct list_head  dead_threads;
-	struct thread	  *last_match;
+	struct threads    threads[THREADS__TABLE_SIZE];
 	struct vdso_info  *vdso_info;
 	struct perf_env   *env;
 	struct dsos	  dsos;
@@ -48,6 +55,12 @@ struct machine {
 	};
 };
 
+static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
+{
+	/* Cast it to handle tid == -1 */
+	return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
+}
+
 static inline
 struct map *__machine__kernel_map(struct machine *machine, enum map_type type)
 {
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
index 808cc45..b30746f 100644
--- a/tools/perf/util/rb_resort.h
+++ b/tools/perf/util/rb_resort.h
@@ -143,7 +143,8 @@ struct __name##_sorted *__name = __name##_sorted__new
 				  __ilist->rblist.nr_entries)
 
 /* For 'struct machine->threads' */
-#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine)			\
-	DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads)
+#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket)	\
+	DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries,	\
+				  __machine->threads[hash_bucket].nr)
 
 #endif /* _PERF_RESORT_RB_H_ */

  parent reply	other threads:[~2017-09-22 16:46 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-11  2:23 [PATCH RFC V2 00/10] perf top optimization kan.liang
2017-09-11  2:23 ` [PATCH RFC V2 01/10] perf tools: hashtable for machine threads kan.liang
2017-09-13 13:29   ` Arnaldo Carvalho de Melo
2017-09-22 16:42   ` tip-bot for Kan Liang [this message]
2017-09-11  2:23 ` [PATCH RFC V2 02/10] perf tools: using scandir to replace readdir kan.liang
2017-09-13 15:19   ` Arnaldo Carvalho de Melo
2017-09-11  2:23 ` [PATCH RFC V2 03/10] petf tools: using comm_str to replace comm in hist_entry kan.liang
2017-09-13 15:21   ` Arnaldo Carvalho de Melo
2017-09-18  8:38     ` Jiri Olsa
2017-09-11  2:23 ` [PATCH RFC V2 04/10] petf tools: introduce a new function to set namespaces id kan.liang
2017-09-11  2:23 ` [PATCH RFC V2 05/10] perf tools: lock to protect thread list kan.liang
2017-09-18  8:50   ` Jiri Olsa
2017-09-18 16:18     ` Liang, Kan
2017-09-11  2:23 ` [PATCH RFC V2 06/10] perf tools: lock to protect comm_str rb tree kan.liang
2017-09-11  2:23 ` [PATCH RFC V2 07/10] perf tools: change machine comm_exec type to atomic kan.liang
2017-09-13 15:24   ` Arnaldo Carvalho de Melo
2017-09-15 20:05     ` Liang, Kan
2017-09-18 11:30       ` Jiri Olsa
2017-09-11  2:23 ` [PATCH RFC V2 08/10] perf top: implement multithreading for perf_event__synthesize_threads kan.liang
2017-09-18 11:24   ` Jiri Olsa
2017-09-11  2:23 ` [PATCH RFC V2 09/10] perf top: add option to set the number of thread for event synthesize kan.liang
2017-09-11  2:23 ` [PATCH RFC V2 10/10] perf top: switch back to overwrite mode kan.liang
2017-09-13 15:25 ` [PATCH RFC V2 00/10] perf top optimization Arnaldo Carvalho de Melo
2017-09-13 15:29   ` Liang, Kan
2017-09-13 15:38     ` Arnaldo Carvalho de Melo
2017-09-14 21:19       ` Arnaldo Carvalho de Melo
2017-09-15 15:11         ` Liang, Kan
2017-09-15 17:26           ` Arnaldo Carvalho de Melo
2017-09-15 17:29             ` Liang, Kan
2017-09-15 18:24               ` Arnaldo Carvalho de Melo
2017-09-15 18:26                 ` Liang, Kan
2017-09-18  8:57 ` Jiri Olsa
2017-09-18 13:01   ` Arnaldo Carvalho de Melo
2017-09-18 16:21     ` Liang, Kan
2017-09-19  8:19     ` Jiri Olsa
2017-09-19 12:39       ` Liang, Kan
2017-09-19 14:24         ` Arnaldo Carvalho de Melo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=tip-91e467bc568f15da2eac688e131010601e889184@git.kernel.org \
    --to=tipbot@zytor.com \
    --cc=acme@redhat.com \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=lukasz.odzioba@intel.com \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.