[PATCH/RFC 11/16] perf top: Implement basic parallel processing

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Namhyung Kim <namhyung@kernel.org>
To: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Jiri Olsa <jolsa@redhat.com>, LKML <linux-kernel@vger.kernel.org>,
	David Ahern <dsahern@gmail.com>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	Andi Kleen <andi@firstfloor.org>,
	Stephane Eranian <eranian@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>
Subject: [PATCH/RFC 11/16] perf top: Implement basic parallel processing
Date: Thu, 10 Dec 2015 16:53:30 +0900	[thread overview]
Message-ID: <1449734015-9148-12-git-send-email-namhyung@kernel.org> (raw)
In-Reply-To: <1449734015-9148-1-git-send-email-namhyung@kernel.org>

This patch changes perf top to process event samples with multiple
threads.  For now, each mmap is read and processed with its own hists by
dedicated reader threads in parallel.  And then a single collector
thread gathers the hist entries and move it to the evsel's hists tree.
As usual, a single UI thread will display them.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/builtin-top.c | 172 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 141 insertions(+), 31 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index b62665ce5ea6..a9b7461be4f0 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -831,6 +831,57 @@ static int symbol_filter(struct map *map, struct symbol *sym)
 	return 0;
 }
 
+struct collector_arg {
+	struct perf_top		*top;
+	struct hists		*hists;
+};
+
+static void collect_hists(struct perf_top *top, struct hists *hists)
+{
+	int i, k;
+	struct perf_evsel *evsel;
+
+	for (i = 0, k = 0; i < top->evlist->nr_mmaps; i++) {
+		evlist__for_each(top->evlist, evsel) {
+			struct hists *src_hists = &hists[k++];
+			struct hists *dst_hists = evsel__hists(evsel);
+			struct hist_entry *he;
+			struct rb_root *root;
+			struct rb_node *next;
+
+			root = hists__get_rotate_entries_in(src_hists);
+			next = rb_first(root);
+
+			while (next) {
+				if (session_done())
+					return;
+				he = rb_entry(next, struct hist_entry, rb_node_in);
+				next = rb_next(next);
+
+				rb_erase(&he->rb_node_in, root);
+
+				pthread_mutex_lock(&dst_hists->lock);
+				hists__collapse_insert_entry(dst_hists,
+							     dst_hists->entries_in, he);
+				pthread_mutex_unlock(&dst_hists->lock);
+			}
+			hists__add_stats(dst_hists, src_hists);
+		}
+	}
+}
+
+static void *collect_worker(void *arg)
+{
+	struct collector_arg *carg = arg;
+
+	while (!done) {
+		collect_hists(carg->top, carg->hists);
+		poll(NULL, 0, 100);
+	}
+
+	return NULL;
+}
+
 static int hist_iter__top_callback(struct hist_entry_iter *iter,
 				   struct addr_location *al, bool single,
 				   void *arg)
@@ -847,13 +898,19 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
 	return 0;
 }
 
-static void perf_event__process_sample(struct perf_tool *tool,
+struct reader_arg {
+	int			idx;
+	struct perf_top		*top;
+	struct hists		*hists;
+};
+
+static void perf_event__process_sample(struct reader_arg *rarg,
 				       const union perf_event *event,
 				       struct perf_evsel *evsel,
 				       struct perf_sample *sample,
 				       struct machine *machine)
 {
-	struct perf_top *top = container_of(tool, struct perf_top, tool);
+	struct perf_top *top = rarg->top;
 	struct addr_location al;
 	int err;
 
@@ -890,10 +947,10 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		perf_top__request_warning(top, &al, WARN_VMLINUX);
 
 	if (al.sym == NULL || !al.sym->ignore) {
-		struct hists *hists = evsel__hists(evsel);
+		struct hists* hists = &rarg->hists[evsel->idx];
 		struct hist_entry_iter iter = {
 			.evsel		= evsel,
-			.hists 		= evsel__hists(evsel),
+			.hists 		= hists,
 			.sample 	= sample,
 			.add_entry_cb 	= hist_iter__top_callback,
 		};
@@ -915,13 +972,15 @@ static void perf_event__process_sample(struct perf_tool *tool,
 	addr_location__put(&al);
 }
 
-static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
+static void perf_top__mmap_read(struct reader_arg *rarg)
 {
 	struct perf_sample sample;
 	struct perf_evsel *evsel;
+	struct perf_top *top = rarg->top;
 	struct perf_session *session = top->session;
 	union perf_event *event;
 	struct machine *machine;
+	int idx = rarg->idx;
 	u8 origin;
 	int ret;
 
@@ -974,10 +1033,11 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 
 
 		if (event->header.type == PERF_RECORD_SAMPLE) {
-			perf_event__process_sample(&top->tool, event, evsel,
+			perf_event__process_sample(rarg, event, evsel,
 						   &sample, machine);
 		} else if (event->header.type < PERF_RECORD_MAX) {
-			hists__inc_nr_events(evsel__hists(evsel), event->header.type);
+			hists__inc_nr_events(&rarg->hists[evsel->idx],
+					     event->header.type);
 			machine__process_event(machine, event, &sample);
 		} else
 			++session->evlist->stats.nr_unknown_events;
@@ -986,12 +1046,30 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 	}
 }
 
-static void perf_top__mmap_read(struct perf_top *top)
+static void *mmap_read_worker(void *arg)
 {
-	int i;
+	struct reader_arg *rarg = arg;
+	struct perf_top *top = rarg->top;
+
+	if (top->realtime_prio) {
+		struct sched_param param;
+
+		param.sched_priority = top->realtime_prio;
+		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
+			ui__error("Could not set realtime priority.\n");
+			return NULL;
+		}
+	}
+
+	while (!done) {
+		u64 hits = top->samples;
 
-	for (i = 0; i < top->evlist->nr_mmaps; i++)
-		perf_top__mmap_read_idx(top, i);
+		perf_top__mmap_read(rarg);
+
+		if (hits == top->samples)
+			perf_evlist__poll(top->evlist, 100);
+	}
+	return NULL;
 }
 
 static int perf_top__start_counters(struct perf_top *top)
@@ -1052,8 +1130,14 @@ static int perf_top__setup_sample_type(struct perf_top *top __maybe_unused)
 static int __cmd_top(struct perf_top *top)
 {
 	struct record_opts *opts = &top->record_opts;
-	pthread_t thread;
+	pthread_t *readers = NULL;
+	pthread_t collector = (pthread_t) 0;
+	pthread_t ui_thread = (pthread_t) 0;
+	struct hists *hists = NULL;
+	struct reader_arg *rargs = NULL;
+	struct collector_arg carg;
 	int ret;
+	int i;
 
 	top->session = perf_session__new(NULL, false, NULL);
 	if (top->session == NULL)
@@ -1104,37 +1188,63 @@ static int __cmd_top(struct perf_top *top)
 	/* Wait for a minimal set of events before starting the snapshot */
 	perf_evlist__poll(top->evlist, 100);
 
-	perf_top__mmap_read(top);
-
 	ret = -1;
-	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
-							    display_thread), top)) {
-		ui__error("Could not create display thread.\n");
+	readers = calloc(sizeof(pthread_t), top->evlist->nr_mmaps);
+	if (readers == NULL)
 		goto out_delete;
-	}
 
-	if (top->realtime_prio) {
-		struct sched_param param;
+	rargs = calloc(sizeof(*rargs), top->evlist->nr_mmaps);
+	if (rargs == NULL)
+		goto out_free;
 
-		param.sched_priority = top->realtime_prio;
-		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
-			ui__error("Could not set realtime priority.\n");
-			goto out_join;
-		}
+	hists = calloc(sizeof(*hists), top->evlist->nr_mmaps * top->evlist->nr_entries);
+	if (hists == NULL)
+		goto out_free;
+
+	for (i = 0; i < top->evlist->nr_mmaps * top->evlist->nr_entries; i++)
+		__hists__init(&hists[i]);
+
+	for (i = 0; i < top->evlist->nr_mmaps; i++) {
+		struct reader_arg *rarg = &rargs[i];
+
+		rarg->idx = i;
+		rarg->top = top;
+		rarg->hists = &hists[i * top->evlist->nr_entries];
+
+		perf_top__mmap_read(rarg);
 	}
+	collect_hists(top, hists);
 
-	while (!done) {
-		u64 hits = top->samples;
+	for (i = 0; i < top->evlist->nr_mmaps; i++) {
+		if (pthread_create(&readers[i], NULL, mmap_read_worker, &rargs[i]))
+			goto out_join;
+	}
 
-		perf_top__mmap_read(top);
+	carg.top = top;
+	carg.hists = hists;
+	if (pthread_create(&collector, NULL, collect_worker, &carg))
+		goto out_join;
 
-		if (hits == top->samples)
-			ret = perf_evlist__poll(top->evlist, 100);
+	if (pthread_create(&ui_thread, NULL, (use_browser > 0 ? display_thread_tui :
+							        display_thread), top)) {
+		ui__error("Could not create display thread.\n");
+		goto out_join;
 	}
 
 	ret = 0;
+
 out_join:
-	pthread_join(thread, NULL);
+	pthread_join(ui_thread, NULL);
+	pthread_join(collector, NULL);
+	for (i = 0; i < top->evlist->nr_mmaps; i++) {
+		pthread_join(readers[i], NULL);
+	}
+
+out_free:
+	free(hists);
+	free(rargs);
+	free(readers);
+
 out_delete:
 	perf_session__delete(top->session);
 	top->session = NULL;
-- 
2.6.2

next prev parent reply	other threads:[~2015-12-10  7:54 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-10  7:53 [PATCHSET 00/16] perf top: Add multi-thread support (v1) Namhyung Kim
2015-12-10  7:53 ` [PATCH/RFC 01/16] perf top: Delete half-processed hist entries when exit Namhyung Kim
2015-12-10  9:55   ` 平松雅巳 / HIRAMATU，MASAMI
2015-12-10 18:57     ` Arnaldo Carvalho de Melo
2015-12-14  8:15   ` [tip:perf/core] " tip-bot for Namhyung Kim
2015-12-10  7:53 ` [PATCH/RFC 02/16] perf top: Fix and cleanup perf_top__record_precise_ip() Namhyung Kim
2015-12-10 19:04   ` Arnaldo Carvalho de Melo
2015-12-11  2:27     ` Namhyung Kim
2015-12-10  7:53 ` [PATCH/RFC 03/16] perf top: Factor out warnings about kernel addresses and symbols Namhyung Kim
2015-12-10 19:07   ` Arnaldo Carvalho de Melo
2015-12-14  1:44     ` Namhyung Kim
2015-12-14  2:02       ` Namhyung Kim
2015-12-10  7:53 ` [PATCH/RFC 04/16] perf top: Factor out warnings in perf_top__record_precise_ip() Namhyung Kim
2015-12-10  7:53 ` [PATCH/RFC 05/16] perf top: Show warning messages in the display thread Namhyung Kim
2015-12-10  7:53 ` [PATCH/RFC 06/16] perf top: Get rid of access to hists->lock in perf_top__record_precise_ip() Namhyung Kim
2015-12-10  7:53 ` [PATCH/RFC 07/16] perf hists: Pass hists struct to hist_entry_iter struct Namhyung Kim
2015-12-13 23:15   ` Jiri Olsa
2015-12-14  1:45     ` Namhyung Kim
2015-12-10  7:53 ` [PATCH/RFC 08/16] perf tools: Export a couple of hist functions Namhyung Kim
2015-12-13 23:17   ` Jiri Olsa
2015-12-10  7:53 ` [PATCH/RFC 09/16] perf tools: Update hist entry's hists pointer Namhyung Kim
2015-12-13 23:23   ` Jiri Olsa
2015-12-13 23:28     ` Jiri Olsa
2015-12-14  1:51       ` Namhyung Kim
2015-12-10  7:53 ` [PATCH/RFC 10/16] perf hist: Add events_stats__add() and hists__add_stats() Namhyung Kim
2015-12-10  7:53 ` Namhyung Kim [this message]
2015-12-14  9:23   ` [PATCH/RFC 11/16] perf top: Implement basic parallel processing Jiri Olsa
2015-12-14  9:35     ` Jiri Olsa
2015-12-15  2:08       ` Namhyung Kim
2015-12-10  7:53 ` [PATCH/RFC 12/16] perf tools: Reduce lock contention when processing events Namhyung Kim
2015-12-14  8:43   ` Jiri Olsa
2015-12-15  2:03     ` Namhyung Kim
2015-12-10  7:53 ` [PATCH/RFC 13/16] perf top: Protect the seen list using mutex Namhyung Kim
2015-12-10  7:53 ` [PATCH/RFC 14/16] perf top: Separate struct perf_top_stats Namhyung Kim
2015-12-10  7:53 ` [PATCH/RFC 15/16] perf top: Add --num-thread option Namhyung Kim
2015-12-10  7:53 ` [PATCH/RFC 16/16] perf tools: Skip dso front cache for multi-threaded lookup Namhyung Kim
2015-12-10  8:01 ` [PATCHSET 00/16] perf top: Add multi-thread support (v1) Ingo Molnar
2015-12-10  8:49   ` Namhyung Kim
2015-12-11  8:11     ` Ingo Molnar
2015-12-11 15:01       ` David Ahern
2015-12-14  1:12         ` Namhyung Kim
2015-12-14  9:26         ` Peter Zijlstra
2015-12-14  9:38           ` Ingo Molnar
2015-12-14 14:55             ` David Ahern
2015-12-14 16:26               ` Arnaldo Carvalho de Melo
2015-12-14 16:41                 ` Peter Zijlstra
2015-12-14 17:52                   ` Arnaldo Carvalho de Melo
2015-12-14 16:38             ` Namhyung Kim
2015-12-14 16:56               ` Peter Zijlstra
2015-12-14 17:11                 ` Namhyung Kim
2015-12-14 14:46           ` David Ahern
2015-12-14 17:06             ` Namhyung Kim
2015-12-14 17:54               ` Arnaldo Carvalho de Melo
2015-12-14 16:25           ` Namhyung Kim
2015-12-14 16:44             ` Peter Zijlstra

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:b62665ce5ea dfblob:a9b7461be4f )
 OR (
bs:"[PATCH/RFC 11/16] perf top: Implement basic parallel processing" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1449734015-9148-12-git-send-email-namhyung@kernel.org \
    --to=namhyung@kernel.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=andi@firstfloor.org \
    --cc=dsahern@gmail.com \
    --cc=eranian@google.com \
    --cc=fweisbec@gmail.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).