All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>
Cc: Clark Williams <williams@redhat.com>,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Davidlohr Bueso <dave@stgolabs.net>,
	Davidlohr Bueso <dbueso@suse.de>, Jiri Olsa <jolsa@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 16/29] perf sched: Use cached rbtrees
Date: Sat, 26 Jan 2019 00:18:30 +0100	[thread overview]
Message-ID: <20190125231843.2895-17-acme@kernel.org> (raw)
In-Reply-To: <20190125231843.2895-1-acme@kernel.org>

From: Davidlohr Bueso <dave@stgolabs.net>

At the cost of an extra pointer, we can avoid the O(logN) cost of
finding the first element in the tree (smallest node), which is
something heavily required for perf-sched.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/20181206191819.30182-8-dave@stgolabs.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-sched.c | 45 +++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 2e0f0c65964a..640558e9352e 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -213,7 +213,7 @@ struct perf_sched {
 	u64		 all_runtime;
 	u64		 all_count;
 	u64		 cpu_last_switched[MAX_CPUS];
-	struct rb_root	 atom_root, sorted_atom_root, merged_atom_root;
+	struct rb_root_cached atom_root, sorted_atom_root, merged_atom_root;
 	struct list_head sort_list, cmp_pid;
 	bool force;
 	bool skip_merge;
@@ -271,7 +271,7 @@ struct evsel_runtime {
 struct idle_thread_runtime {
 	struct thread_runtime	tr;
 	struct thread		*last_thread;
-	struct rb_root		sorted_root;
+	struct rb_root_cached	sorted_root;
 	struct callchain_root	callchain;
 	struct callchain_cursor	cursor;
 };
@@ -950,10 +950,10 @@ thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *
 }
 
 static struct work_atoms *
-thread_atoms_search(struct rb_root *root, struct thread *thread,
+thread_atoms_search(struct rb_root_cached *root, struct thread *thread,
 			 struct list_head *sort_list)
 {
-	struct rb_node *node = root->rb_node;
+	struct rb_node *node = root->rb_root.rb_node;
 	struct work_atoms key = { .thread = thread };
 
 	while (node) {
@@ -976,10 +976,11 @@ thread_atoms_search(struct rb_root *root, struct thread *thread,
 }
 
 static void
-__thread_latency_insert(struct rb_root *root, struct work_atoms *data,
+__thread_latency_insert(struct rb_root_cached *root, struct work_atoms *data,
 			 struct list_head *sort_list)
 {
-	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL;
+	bool leftmost = true;
 
 	while (*new) {
 		struct work_atoms *this;
@@ -992,12 +993,14 @@ __thread_latency_insert(struct rb_root *root, struct work_atoms *data,
 
 		if (cmp > 0)
 			new = &((*new)->rb_left);
-		else
+		else {
 			new = &((*new)->rb_right);
+			leftmost = false;
+		}
 	}
 
 	rb_link_node(&data->node, parent, new);
-	rb_insert_color(&data->node, root);
+	rb_insert_color_cached(&data->node, root, leftmost);
 }
 
 static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread)
@@ -1447,15 +1450,15 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
 static void perf_sched__sort_lat(struct perf_sched *sched)
 {
 	struct rb_node *node;
-	struct rb_root *root = &sched->atom_root;
+	struct rb_root_cached *root = &sched->atom_root;
 again:
 	for (;;) {
 		struct work_atoms *data;
-		node = rb_first(root);
+		node = rb_first_cached(root);
 		if (!node)
 			break;
 
-		rb_erase(node, root);
+		rb_erase_cached(node, root);
 		data = rb_entry(node, struct work_atoms, node);
 		__thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
 	}
@@ -2762,12 +2765,12 @@ static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
 	return ret;
 }
 
-static size_t timehist_print_idlehist_callchain(struct rb_root *root)
+static size_t timehist_print_idlehist_callchain(struct rb_root_cached *root)
 {
 	size_t ret = 0;
 	FILE *fp = stdout;
 	struct callchain_node *chain;
-	struct rb_node *rb_node = rb_first(root);
+	struct rb_node *rb_node = rb_first_cached(root);
 
 	printf("  %16s  %8s  %s\n", "Idle time (msec)", "Count", "Callchains");
 	printf("  %.16s  %.8s  %.50s\n", graph_dotted_line, graph_dotted_line,
@@ -2868,7 +2871,7 @@ static void timehist_print_summary(struct perf_sched *sched,
 			if (itr == NULL)
 				continue;
 
-			callchain_param.sort(&itr->sorted_root, &itr->callchain,
+			callchain_param.sort(&itr->sorted_root.rb_root, &itr->callchain,
 					     0, &callchain_param);
 
 			printf("  CPU %2d:", i);
@@ -3074,11 +3077,12 @@ static void print_bad_events(struct perf_sched *sched)
 	}
 }
 
-static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
+static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *data)
 {
-	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL;
 	struct work_atoms *this;
 	const char *comm = thread__comm_str(data->thread), *this_comm;
+	bool leftmost = true;
 
 	while (*new) {
 		int cmp;
@@ -3092,6 +3096,7 @@ static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
 			new = &((*new)->rb_left);
 		} else if (cmp < 0) {
 			new = &((*new)->rb_right);
+			leftmost = false;
 		} else {
 			this->num_merged++;
 			this->total_runtime += data->total_runtime;
@@ -3109,7 +3114,7 @@ static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
 
 	data->num_merged++;
 	rb_link_node(&data->node, parent, new);
-	rb_insert_color(&data->node, root);
+	rb_insert_color_cached(&data->node, root, leftmost);
 }
 
 static void perf_sched__merge_lat(struct perf_sched *sched)
@@ -3120,8 +3125,8 @@ static void perf_sched__merge_lat(struct perf_sched *sched)
 	if (sched->skip_merge)
 		return;
 
-	while ((node = rb_first(&sched->atom_root))) {
-		rb_erase(node, &sched->atom_root);
+	while ((node = rb_first_cached(&sched->atom_root))) {
+		rb_erase_cached(node, &sched->atom_root);
 		data = rb_entry(node, struct work_atoms, node);
 		__merge_work_atoms(&sched->merged_atom_root, data);
 	}
@@ -3143,7 +3148,7 @@ static int perf_sched__lat(struct perf_sched *sched)
 	printf("  Task                  |   Runtime ms  | Switches | Average delay ms | Maximum delay ms | Maximum delay at       |\n");
 	printf(" -----------------------------------------------------------------------------------------------------------------\n");
 
-	next = rb_first(&sched->sorted_atom_root);
+	next = rb_first_cached(&sched->sorted_atom_root);
 
 	while (next) {
 		struct work_atoms *work_list;
-- 
2.20.1

  parent reply	other threads:[~2019-01-25 23:18 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-25 23:18 [GIT PULL 00/29] perf/core improvements and fixes Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 01/29] perf symbols: Move symbol_conf to separate file Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 02/29] perf annotate: Remove lots of headers from annotate.h Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 03/29] perf tools: Move branch structs to branch.h Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 04/29] perf block-range: Add missing headers Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 05/29] perf symbols: Remove include map.h from dso.h Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 06/29] perf symbols: Remove some unnecessary includes from symbol.h Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 07/29] perf namespaces: Remove namespaces.h from .h headers Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 08/29] perf comm: Remove needless headers from comm.h Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 09/29] perf callchain: No need to include perf.h Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 10/29] tools: Update rbtree implementation Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 11/29] perf machine: Use cached rbtrees Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 12/29] perf callchain: " Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 13/29] perf util: Use cached rbtree for rblists Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 14/29] perf symbols: Use cached rbtrees Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 15/29] perf hist: " Arnaldo Carvalho de Melo
2019-01-25 23:18 ` Arnaldo Carvalho de Melo [this message]
2019-01-25 23:18 ` [PATCH 17/29] perf bpf: Fix synthesized PERF_RECORD_KSYMBOL/BPF_EVENT Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 18/29] perf script python: Add trace_context extension module to sys.modules Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 19/29] perf script python: Use PyBytes for attr in trace-event-python Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 20/29] perf script python: Remove explicit shebang from setup.py Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 21/29] perf script python: Remove explicit shebang from tests/attr.c Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 22/29] perf script python: Remove explicit shebang from Python scripts Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 23/29] perf script python: Add Python3 support to tests/attr.py Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 24/29] perf bpf: Add bpf_map() helper Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 25/29] perf bpf: Convert pid_map() to bpf_map() Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 26/29] perf augmented_raw_syscalls: Use bpf_map() Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 27/29] perf trace: Fixup etcsnoop example Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 28/29] perf bpf examples: Convert etcsnoop to use bpf_map() Arnaldo Carvalho de Melo
2019-01-25 23:18 ` [PATCH 29/29] perf augmented_syscalls: Convert to bpf_map() Arnaldo Carvalho de Melo
2019-01-26  9:52 ` [GIT PULL 00/29] perf/core improvements and fixes Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190125231843.2895-17-acme@kernel.org \
    --to=acme@kernel.org \
    --cc=acme@redhat.com \
    --cc=dave@stgolabs.net \
    --cc=dbueso@suse.de \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=williams@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.