linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 8/8] perf tools: Get current comm instead of last one
  2013-09-26  8:58 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v4) Namhyung Kim
@ 2013-09-26  8:58 ` Namhyung Kim
  2013-10-02 10:01   ` Frederic Weisbecker
  0 siblings, 1 reply; 91+ messages in thread
From: Namhyung Kim @ 2013-09-26  8:58 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker, Jiri Olsa,
	Arnaldo Carvalho de Melo

From: Namhyung Kim <namhyung.kim@lge.com>

At insert time, a hist entry should reference comm at the time
otherwise it'll get the last comm anyway.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-n6pykiiymtgmcjs834go2t8x@git.kernel.org
[ Fixed up const pointer issues ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/comm.c   | 15 +++++++++++++++
 tools/perf/util/comm.h   |  1 +
 tools/perf/util/hist.c   |  3 +++
 tools/perf/util/sort.c   | 14 +++++---------
 tools/perf/util/sort.h   |  1 +
 tools/perf/util/thread.c |  6 +++---
 tools/perf/util/thread.h |  2 ++
 7 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index 0cfb55202537..ecd3c9938d3e 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -94,6 +94,21 @@ struct comm *comm__new(const char *str, u64 timestamp)
 	return comm;
 }
 
+void comm__override(struct comm *comm, const char *str, u64 timestamp)
+{
+	struct comm_str *old = comm->comm_str;
+
+	comm->comm_str = comm_str_findnew(str, &comm_str_root);
+	if (!comm->comm_str) {
+		comm->comm_str = old;
+		return;
+	}
+
+	comm->start = timestamp;
+	comm_str_get(comm->comm_str);
+	comm_str_put(old);
+}
+
 void comm__free(struct comm *comm)
 {
 	comm_str_put(comm->comm_str);
diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h
index f62d215bede2..b2c364b37468 100644
--- a/tools/perf/util/comm.h
+++ b/tools/perf/util/comm.h
@@ -16,5 +16,6 @@ struct comm {
 void comm__free(struct comm *comm);
 struct comm *comm__new(const char *str, u64 timestamp);
 const char *comm__str(const struct comm *comm);
+void comm__override(struct comm *self, const char *str, u64 timestamp);
 
 #endif  /* __PERF_COMM_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index a2d58e7abdcf..1aa97b012a09 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -412,6 +412,7 @@ struct hist_entry *__hists__add_mem_entry(struct hists *self,
 {
 	struct hist_entry entry = {
 		.thread	= al->thread,
+		.comm = curr_comm(al->thread),
 		.ms = {
 			.map	= al->map,
 			.sym	= al->sym,
@@ -442,6 +443,7 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self,
 {
 	struct hist_entry entry = {
 		.thread	= al->thread,
+		.comm = curr_comm(al->thread),
 		.ms = {
 			.map	= bi->to.map,
 			.sym	= bi->to.sym,
@@ -471,6 +473,7 @@ struct hist_entry *__hists__add_entry(struct hists *self,
 {
 	struct hist_entry entry = {
 		.thread	= al->thread,
+		.comm = curr_comm(al->thread),
 		.ms = {
 			.map	= al->map,
 			.sym	= al->sym,
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 48e6a38132c9..eefc0e623966 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1,5 +1,6 @@
 #include "sort.h"
 #include "hist.h"
+#include "comm.h"
 #include "symbol.h"
 
 regex_t		parent_regex;
@@ -80,25 +81,20 @@ static int64_t
 sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	/* Compare the addr that should be unique among comm */
-	return thread__comm_curr(right->thread) - thread__comm_curr(left->thread);
+	return comm__str(right->comm) - comm__str(left->comm);
 }
 
 static int64_t
 sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
 {
-	const char *comm_l = thread__comm_curr(left->thread);
-	const char *comm_r = thread__comm_curr(right->thread);
-
-	if (!comm_l || !comm_r)
-		return cmp_null(comm_l, comm_r);
-
-	return strcmp(comm_l, comm_r);
+	/* Compare the addr that should be unique among comm */
+	return comm__str(right->comm) - comm__str(left->comm);
 }
 
 static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
 				     size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%*s", width, thread__comm_curr(self->thread));
+	return repsep_snprintf(bf, size, "%*s", width, comm__str(self->comm));
 }
 
 struct sort_entry sort_comm = {
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 4e80dbd271e7..4d0153f83e3c 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -84,6 +84,7 @@ struct hist_entry {
 	struct he_stat		stat;
 	struct map_symbol	ms;
 	struct thread		*thread;
+	struct comm		*comm;
 	u64			ip;
 	s32			cpu;
 
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 9cb01566eff1..444de7a8527f 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -54,7 +54,7 @@ void thread__delete(struct thread *self)
 	free(self);
 }
 
-static struct comm *curr_comm(const struct thread *thread)
+struct comm *curr_comm(const struct thread *thread)
 {
 	if (list_empty(&thread->comm_list))
 		return NULL;
@@ -69,8 +69,8 @@ int thread__set_comm(struct thread *thread, const char *str, u64 timestamp)
 
 	/* Override latest entry if it had no specific time coverage */
 	if (!curr->start) {
-		list_del(&curr->list);
-		comm__free(curr);
+		comm__override(curr, str, timestamp);
+		return 0;
 	}
 
 	new = comm__new(str, timestamp);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 0ec37bae88d0..196c6c2dfbe1 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -26,6 +26,7 @@ struct thread {
 };
 
 struct machine;
+struct comm;
 
 struct thread *thread__new(pid_t pid, pid_t tid);
 void thread__delete(struct thread *self);
@@ -36,6 +37,7 @@ static inline void thread__exited(struct thread *thread)
 
 int thread__set_comm(struct thread *self, const char *comm, u64 timestamp);
 int thread__comm_len(struct thread *self);
+struct comm *curr_comm(const struct thread *self);
 const char *thread__comm_curr(const struct thread *thread);
 void thread__insert_map(struct thread *self, struct map *map);
 int thread__fork(struct thread *self, struct thread *parent, u64 timestamp);
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 91+ messages in thread

* Re: [PATCH 8/8] perf tools: Get current comm instead of last one
  2013-09-26  8:58 ` [PATCH 8/8] perf tools: Get current comm instead of last one Namhyung Kim
@ 2013-10-02 10:01   ` Frederic Weisbecker
  2013-10-08  1:56     ` Namhyung Kim
  0 siblings, 1 reply; 91+ messages in thread
From: Frederic Weisbecker @ 2013-10-02 10:01 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds, Jiri Olsa,
	Arnaldo Carvalho de Melo

On Thu, Sep 26, 2013 at 05:58:10PM +0900, Namhyung Kim wrote:
> From: Namhyung Kim <namhyung.kim@lge.com>
> 
> At insert time, a hist entry should reference comm at the time
> otherwise it'll get the last comm anyway.
> 
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Link: http://lkml.kernel.org/n/tip-n6pykiiymtgmcjs834go2t8x@git.kernel.org
> [ Fixed up const pointer issues ]
> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> ---
>  tools/perf/util/comm.c   | 15 +++++++++++++++
>  tools/perf/util/comm.h   |  1 +
>  tools/perf/util/hist.c   |  3 +++
>  tools/perf/util/sort.c   | 14 +++++---------
>  tools/perf/util/sort.h   |  1 +
>  tools/perf/util/thread.c |  6 +++---
>  tools/perf/util/thread.h |  2 ++
>  7 files changed, 30 insertions(+), 12 deletions(-)
> 
> diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
> index 0cfb55202537..ecd3c9938d3e 100644
> --- a/tools/perf/util/comm.c
> +++ b/tools/perf/util/comm.c
> @@ -94,6 +94,21 @@ struct comm *comm__new(const char *str, u64 timestamp)
>  	return comm;
>  }
>  
> +void comm__override(struct comm *comm, const char *str, u64 timestamp)
> +{
> +	struct comm_str *old = comm->comm_str;
> +
> +	comm->comm_str = comm_str_findnew(str, &comm_str_root);
> +	if (!comm->comm_str) {
> +		comm->comm_str = old;
> +		return;
> +	}
> +
> +	comm->start = timestamp;
> +	comm_str_get(comm->comm_str);
> +	comm_str_put(old);
> +}
> +
>  void comm__free(struct comm *comm)
>  {
>  	comm_str_put(comm->comm_str);
> diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h
> index f62d215bede2..b2c364b37468 100644
> --- a/tools/perf/util/comm.h
> +++ b/tools/perf/util/comm.h
> @@ -16,5 +16,6 @@ struct comm {
>  void comm__free(struct comm *comm);
>  struct comm *comm__new(const char *str, u64 timestamp);
>  const char *comm__str(const struct comm *comm);
> +void comm__override(struct comm *self, const char *str, u64 timestamp);
>  
>  #endif  /* __PERF_COMM_H */
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index a2d58e7abdcf..1aa97b012a09 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -412,6 +412,7 @@ struct hist_entry *__hists__add_mem_entry(struct hists *self,
>  {
>  	struct hist_entry entry = {
>  		.thread	= al->thread,
> +		.comm = curr_comm(al->thread),

So now that we have an accessor for that, it clashes a bit with existing names.

I suggest we rename thread__comm_curr() -> thread__comm_str()
And rename curr_comm() -> thread__comm()

Hmm?

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 8/8] perf tools: Get current comm instead of last one
  2013-10-02 10:01   ` Frederic Weisbecker
@ 2013-10-08  1:56     ` Namhyung Kim
  0 siblings, 0 replies; 91+ messages in thread
From: Namhyung Kim @ 2013-10-08  1:56 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Arnaldo Carvalho de Melo, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds, Jiri Olsa,
	Arnaldo Carvalho de Melo

Hi Frederic,

On Wed, 2 Oct 2013 12:01:51 +0200, Frederic Weisbecker wrote:
> On Thu, Sep 26, 2013 at 05:58:10PM +0900, Namhyung Kim wrote:
>> From: Namhyung Kim <namhyung.kim@lge.com>
>> 
>> At insert time, a hist entry should reference comm at the time
>> otherwise it'll get the last comm anyway.

[SNIP]

>> --- a/tools/perf/util/comm.h
>> +++ b/tools/perf/util/comm.h
>> @@ -16,5 +16,6 @@ struct comm {
>>  void comm__free(struct comm *comm);
>>  struct comm *comm__new(const char *str, u64 timestamp);
>>  const char *comm__str(const struct comm *comm);
>> +void comm__override(struct comm *self, const char *str, u64 timestamp);
>>  
>>  #endif  /* __PERF_COMM_H */
>> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
>> index a2d58e7abdcf..1aa97b012a09 100644
>> --- a/tools/perf/util/hist.c
>> +++ b/tools/perf/util/hist.c
>> @@ -412,6 +412,7 @@ struct hist_entry *__hists__add_mem_entry(struct hists *self,
>>  {
>>  	struct hist_entry entry = {
>>  		.thread	= al->thread,
>> +		.comm = curr_comm(al->thread),
>
> So now that we have an accessor for that, it clashes a bit with existing names.
>
> I suggest we rename thread__comm_curr() -> thread__comm_str()
> And rename curr_comm() -> thread__comm()
>
> Hmm?

Okay, I'll rename and resend.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 91+ messages in thread

* [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
@ 2013-10-11  5:15 Namhyung Kim
  2013-10-11  5:15 ` [PATCH 1/8] perf callchain: Convert children list to rbtree Namhyung Kim
                   ` (9 more replies)
  0 siblings, 10 replies; 91+ messages in thread
From: Namhyung Kim @ 2013-10-11  5:15 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker, Jiri Olsa

Hello,

This is a new version of callchain improvement patchset.  Basically
it's almost same as v4 but rebased on current acme/perf/core and some
functions are renamed as Frederic requested.

Now I'm hunting down a bug in 'perf report -s sym' which was found
during the test, but I think it's not related to this change as it can
be reproduced in earlier versions too.

I put this series on 'perf/callchain-v5' branch in my tree

  git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git


Any comments are welcome, thanks.
Namhyung


Frederic Weisbecker (4):
  perf tools: Use an accessor to read thread comm
  perf tools: Add time argument on comm setting
  perf tools: Add new comm infrastructure
  perf tools: Compare hists comm by addresses

Namhyung Kim (4):
  perf callchain: Convert children list to rbtree
  perf ui/progress: Add new helper functions for progress bar
  perf tools: Show progress on histogram collapsing
  perf tools: Get current comm instead of last one

 tools/perf/Makefile                                |   2 +
 tools/perf/builtin-annotate.c                      |   2 +-
 tools/perf/builtin-diff.c                          |   2 +-
 tools/perf/builtin-kmem.c                          |   2 +-
 tools/perf/builtin-lock.c                          |   2 +-
 tools/perf/builtin-report.c                        |  10 +-
 tools/perf/builtin-sched.c                         |  16 +--
 tools/perf/builtin-script.c                        |   6 +-
 tools/perf/builtin-top.c                           |   6 +-
 tools/perf/builtin-trace.c                         |  16 +--
 tools/perf/tests/code-reading.c                    |   2 +-
 tools/perf/tests/hists_link.c                      |   8 +-
 tools/perf/ui/browsers/hists.c                     |  10 +-
 tools/perf/ui/progress.c                           |  18 +++
 tools/perf/ui/progress.h                           |  10 ++
 tools/perf/util/callchain.c                        | 147 ++++++++++++++++-----
 tools/perf/util/callchain.h                        |  11 +-
 tools/perf/util/comm.c                             | 121 +++++++++++++++++
 tools/perf/util/comm.h                             |  21 +++
 tools/perf/util/event.c                            |  32 ++---
 tools/perf/util/hist.c                             |   8 +-
 tools/perf/util/hist.h                             |   3 +-
 tools/perf/util/machine.c                          |  39 +++---
 tools/perf/util/machine.h                          |  21 ++-
 .../perf/util/scripting-engines/trace-event-perl.c |   2 +-
 .../util/scripting-engines/trace-event-python.c    |   4 +-
 tools/perf/util/session.c                          |  26 ++--
 tools/perf/util/sort.c                             |  19 ++-
 tools/perf/util/sort.h                             |   1 +
 tools/perf/util/thread.c                           | 103 +++++++++++----
 tools/perf/util/thread.h                           |  10 +-
 31 files changed, 503 insertions(+), 177 deletions(-)
 create mode 100644 tools/perf/util/comm.c
 create mode 100644 tools/perf/util/comm.h

-- 
1.7.11.7


^ permalink raw reply	[flat|nested] 91+ messages in thread

* [PATCH 1/8] perf callchain: Convert children list to rbtree
  2013-10-11  5:15 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Namhyung Kim
@ 2013-10-11  5:15 ` Namhyung Kim
  2013-10-23  7:54   ` [tip:perf/core] " tip-bot for Namhyung Kim
  2013-10-11  5:15 ` [PATCH 2/8] perf ui/progress: Add new helper functions for progress bar Namhyung Kim
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 91+ messages in thread
From: Namhyung Kim @ 2013-10-11  5:15 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker, Jiri Olsa

From: Namhyung Kim <namhyung.kim@lge.com>

Current collapse stage has a scalability problem which can be
reproduced easily with parallel kernel build.  This is because it
needs to traverse every children of callchain linearly during the
collapse/merge stage.  Convert it to rbtree reduced the overhead
significantly.

On my 400MB perf.data file which recorded with make -j32 kernel build:

  $ time perf --no-pager report --stdio > /dev/null

before:
  real	6m22.073s
  user	6m18.683s
  sys	0m0.706s

after:
  real	0m20.780s
  user	0m19.962s
  sys	0m0.689s

During the perf report the overhead on append_chain_children went down
from 96.69% to 18.16%:

  -  18.16%  perf  perf                [.] append_chain_children
     - append_chain_children
        - 77.48% append_chain_children
           + 69.79% merge_chain_branch
           - 22.96% append_chain_children
              + 67.44% merge_chain_branch
              + 30.15% append_chain_children
              + 2.41% callchain_append
           + 7.25% callchain_append
        + 12.26% callchain_append
        + 10.22% merge_chain_branch
  +  11.58%  perf  perf                [.] dso__find_symbol
  +   8.02%  perf  perf                [.] sort__comm_cmp
  +   5.48%  perf  libc-2.17.so        [.] malloc_consolidate

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-d9tcfow6stbrp4btvgs51y67@git.kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/callchain.c | 147 +++++++++++++++++++++++++++++++++-----------
 tools/perf/util/callchain.h |  11 ++--
 2 files changed, 116 insertions(+), 42 deletions(-)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 482f68081cd8..e3970e3eaacf 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -21,12 +21,6 @@
 
 __thread struct callchain_cursor callchain_cursor;
 
-#define chain_for_each_child(child, parent)	\
-	list_for_each_entry(child, &parent->children, siblings)
-
-#define chain_for_each_child_safe(child, next, parent)	\
-	list_for_each_entry_safe(child, next, &parent->children, siblings)
-
 static void
 rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
 		    enum chain_mode mode)
@@ -71,10 +65,16 @@ static void
 __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
 		  u64 min_hit)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 
-	chain_for_each_child(child, node)
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
 		__sort_chain_flat(rb_root, child, min_hit);
+	}
 
 	if (node->hit && node->hit >= min_hit)
 		rb_insert_callchain(rb_root, node, CHAIN_FLAT);
@@ -94,11 +94,16 @@ sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
 static void __sort_chain_graph_abs(struct callchain_node *node,
 				   u64 min_hit)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 
 	node->rb_root = RB_ROOT;
+	n = rb_first(&node->rb_root_in);
+
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
 
-	chain_for_each_child(child, node) {
 		__sort_chain_graph_abs(child, min_hit);
 		if (callchain_cumul_hits(child) >= min_hit)
 			rb_insert_callchain(&node->rb_root, child,
@@ -117,13 +122,18 @@ sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
 static void __sort_chain_graph_rel(struct callchain_node *node,
 				   double min_percent)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 	u64 min_hit;
 
 	node->rb_root = RB_ROOT;
 	min_hit = ceil(node->children_hit * min_percent);
 
-	chain_for_each_child(child, node) {
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
 		__sort_chain_graph_rel(child, min_percent);
 		if (callchain_cumul_hits(child) >= min_hit)
 			rb_insert_callchain(&node->rb_root, child,
@@ -173,19 +183,26 @@ create_child(struct callchain_node *parent, bool inherit_children)
 		return NULL;
 	}
 	new->parent = parent;
-	INIT_LIST_HEAD(&new->children);
 	INIT_LIST_HEAD(&new->val);
 
 	if (inherit_children) {
-		struct callchain_node *next;
+		struct rb_node *n;
+		struct callchain_node *child;
+
+		new->rb_root_in = parent->rb_root_in;
+		parent->rb_root_in = RB_ROOT;
 
-		list_splice(&parent->children, &new->children);
-		INIT_LIST_HEAD(&parent->children);
+		n = rb_first(&new->rb_root_in);
+		while (n) {
+			child = rb_entry(n, struct callchain_node, rb_node_in);
+			child->parent = new;
+			n = rb_next(n);
+		}
 
-		chain_for_each_child(next, new)
-			next->parent = new;
+		/* make it the first child */
+		rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node);
+		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
 	}
-	list_add_tail(&new->siblings, &parent->children);
 
 	return new;
 }
@@ -223,7 +240,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 	}
 }
 
-static void
+static struct callchain_node *
 add_child(struct callchain_node *parent,
 	  struct callchain_cursor *cursor,
 	  u64 period)
@@ -235,6 +252,19 @@ add_child(struct callchain_node *parent,
 
 	new->children_hit = 0;
 	new->hit = period;
+	return new;
+}
+
+static s64 match_chain(struct callchain_cursor_node *node,
+		      struct callchain_list *cnode)
+{
+	struct symbol *sym = node->sym;
+
+	if (cnode->ms.sym && sym &&
+	    callchain_param.key == CCKEY_FUNCTION)
+		return cnode->ms.sym->start - sym->start;
+	else
+		return cnode->ip - node->ip;
 }
 
 /*
@@ -272,9 +302,33 @@ split_add_child(struct callchain_node *parent,
 
 	/* create a new child for the new branch if any */
 	if (idx_total < cursor->nr) {
+		struct callchain_node *first;
+		struct callchain_list *cnode;
+		struct callchain_cursor_node *node;
+		struct rb_node *p, **pp;
+
 		parent->hit = 0;
-		add_child(parent, cursor, period);
 		parent->children_hit += period;
+
+		node = callchain_cursor_current(cursor);
+		new = add_child(parent, cursor, period);
+
+		/*
+		 * This is second child since we moved parent's children
+		 * to new (first) child above.
+		 */
+		p = parent->rb_root_in.rb_node;
+		first = rb_entry(p, struct callchain_node, rb_node_in);
+		cnode = list_first_entry(&first->val, struct callchain_list,
+					 list);
+
+		if (match_chain(node, cnode) < 0)
+			pp = &p->rb_left;
+		else
+			pp = &p->rb_right;
+
+		rb_link_node(&new->rb_node_in, p, pp);
+		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
 	} else {
 		parent->hit = period;
 	}
@@ -291,16 +345,40 @@ append_chain_children(struct callchain_node *root,
 		      u64 period)
 {
 	struct callchain_node *rnode;
+	struct callchain_cursor_node *node;
+	struct rb_node **p = &root->rb_root_in.rb_node;
+	struct rb_node *parent = NULL;
+
+	node = callchain_cursor_current(cursor);
+	if (!node)
+		return;
 
 	/* lookup in childrens */
-	chain_for_each_child(rnode, root) {
-		unsigned int ret = append_chain(rnode, cursor, period);
+	while (*p) {
+		s64 ret;
+		struct callchain_list *cnode;
 
-		if (!ret)
+		parent = *p;
+		rnode = rb_entry(parent, struct callchain_node, rb_node_in);
+		cnode = list_first_entry(&rnode->val, struct callchain_list,
+					 list);
+
+		/* just check first entry */
+		ret = match_chain(node, cnode);
+		if (ret == 0) {
+			append_chain(rnode, cursor, period);
 			goto inc_children_hit;
+		}
+
+		if (ret < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
 	}
 	/* nothing in children, add to the current node */
-	add_child(root, cursor, period);
+	rnode = add_child(root, cursor, period);
+	rb_link_node(&rnode->rb_node_in, parent, p);
+	rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
 
 inc_children_hit:
 	root->children_hit += period;
@@ -325,28 +403,20 @@ append_chain(struct callchain_node *root,
 	 */
 	list_for_each_entry(cnode, &root->val, list) {
 		struct callchain_cursor_node *node;
-		struct symbol *sym;
 
 		node = callchain_cursor_current(cursor);
 		if (!node)
 			break;
 
-		sym = node->sym;
-
-		if (cnode->ms.sym && sym &&
-		    callchain_param.key == CCKEY_FUNCTION) {
-			if (cnode->ms.sym->start != sym->start)
-				break;
-		} else if (cnode->ip != node->ip)
+		if (match_chain(node, cnode) != 0)
 			break;
 
-		if (!found)
-			found = true;
+		found = true;
 
 		callchain_cursor_advance(cursor);
 	}
 
-	/* matches not, relay on the parent */
+	/* matches not, relay no the parent */
 	if (!found) {
 		cursor->curr = curr_snap;
 		cursor->pos = start;
@@ -395,8 +465,9 @@ merge_chain_branch(struct callchain_cursor *cursor,
 		   struct callchain_node *dst, struct callchain_node *src)
 {
 	struct callchain_cursor_node **old_last = cursor->last;
-	struct callchain_node *child, *next_child;
+	struct callchain_node *child;
 	struct callchain_list *list, *next_list;
+	struct rb_node *n;
 	int old_pos = cursor->nr;
 	int err = 0;
 
@@ -412,12 +483,16 @@ merge_chain_branch(struct callchain_cursor *cursor,
 		append_chain_children(dst, cursor, src->hit);
 	}
 
-	chain_for_each_child_safe(child, next_child, src) {
+	n = rb_first(&src->rb_root_in);
+	while (n) {
+		child = container_of(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+		rb_erase(&child->rb_node_in, &src->rb_root_in);
+
 		err = merge_chain_branch(cursor, dst, child);
 		if (err)
 			break;
 
-		list_del(&child->siblings);
 		free(child);
 	}
 
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 2b585bc308cf..7bb36022377f 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -21,11 +21,11 @@ enum chain_order {
 
 struct callchain_node {
 	struct callchain_node	*parent;
-	struct list_head	siblings;
-	struct list_head	children;
 	struct list_head	val;
-	struct rb_node		rb_node; /* to sort nodes in an rbtree */
-	struct rb_root		rb_root; /* sorted tree of children */
+	struct rb_node		rb_node_in; /* to insert nodes in an rbtree */
+	struct rb_node		rb_node;    /* to sort nodes in an output tree */
+	struct rb_root		rb_root_in; /* input tree of children */
+	struct rb_root		rb_root;    /* sorted output tree of children */
 	unsigned int		val_nr;
 	u64			hit;
 	u64			children_hit;
@@ -86,13 +86,12 @@ extern __thread struct callchain_cursor callchain_cursor;
 
 static inline void callchain_init(struct callchain_root *root)
 {
-	INIT_LIST_HEAD(&root->node.siblings);
-	INIT_LIST_HEAD(&root->node.children);
 	INIT_LIST_HEAD(&root->node.val);
 
 	root->node.parent = NULL;
 	root->node.hit = 0;
 	root->node.children_hit = 0;
+	root->node.rb_root_in = RB_ROOT;
 	root->max_depth = 0;
 }
 
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [PATCH 2/8] perf ui/progress: Add new helper functions for progress bar
  2013-10-11  5:15 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Namhyung Kim
  2013-10-11  5:15 ` [PATCH 1/8] perf callchain: Convert children list to rbtree Namhyung Kim
@ 2013-10-11  5:15 ` Namhyung Kim
  2013-10-21 18:13   ` Arnaldo Carvalho de Melo
  2013-10-11  5:15 ` [PATCH 3/8] perf tools: Show progress on histogram collapsing Namhyung Kim
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 91+ messages in thread
From: Namhyung Kim @ 2013-10-11  5:15 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker, Jiri Olsa

From: Namhyung Kim <namhyung.kim@lge.com>

Introduce ui_progress_setup() and ui_progress__advance() to separate
out from the session logic.  It'll be used by other places in the
upcoming patch.

Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/ui/progress.c  | 18 ++++++++++++++++++
 tools/perf/ui/progress.h  | 10 ++++++++++
 tools/perf/util/session.c | 24 ++++++++++++------------
 3 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c
index 3ec695607a4d..b5c22c6b1d93 100644
--- a/tools/perf/ui/progress.c
+++ b/tools/perf/ui/progress.c
@@ -19,6 +19,24 @@ void ui_progress__update(u64 curr, u64 total, const char *title)
 	return progress_fns->update(curr, total, title);
 }
 
+void ui_progress__setup(struct perf_progress *p, u64 total)
+{
+	p->curr = 0;
+	p->next = p->unit = total / 16;
+	p->total = total;
+
+}
+
+void ui_progress__advance(struct perf_progress *p, u64 adv, const char *title)
+{
+	p->curr += adv;
+
+	if (p->curr >= p->next) {
+		p->next += p->unit;
+		ui_progress__update(p->curr, p->total, title);
+	}
+}
+
 void ui_progress__finish(void)
 {
 	if (progress_fns->finish)
diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h
index 257cc224f9cf..3bd23e825953 100644
--- a/tools/perf/ui/progress.h
+++ b/tools/perf/ui/progress.h
@@ -8,10 +8,20 @@ struct ui_progress {
 	void (*finish)(void);
 };
 
+struct perf_progress {
+	u64 curr;
+	u64 next;
+	u64 unit;
+	u64 total;
+};
+
 extern struct ui_progress *progress_fns;
 
 void ui_progress__init(void);
 
+void ui_progress__setup(struct perf_progress *p, u64 total);
+void ui_progress__advance(struct perf_progress *p, u64 adv, const char *title);
+
 void ui_progress__update(u64 curr, u64 total, const char *title);
 void ui_progress__finish(void);
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 6c1d4447c5b4..9f62be8bc167 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -538,13 +538,16 @@ static int flush_sample_queue(struct perf_session *s,
 	struct perf_sample sample;
 	u64 limit = os->next_flush;
 	u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
-	unsigned idx = 0, progress_next = os->nr_samples / 16;
 	bool show_progress = limit == ULLONG_MAX;
+	struct perf_progress prog;
 	int ret;
 
 	if (!tool->ordered_samples || !limit)
 		return 0;
 
+	if (show_progress)
+		ui_progress__setup(&prog, os->nr_samples);
+
 	list_for_each_entry_safe(iter, tmp, head, list) {
 		if (iter->timestamp > limit)
 			break;
@@ -562,10 +565,10 @@ static int flush_sample_queue(struct perf_session *s,
 		os->last_flush = iter->timestamp;
 		list_del(&iter->list);
 		list_add(&iter->list, &os->sample_cache);
-		if (show_progress && (++idx >= progress_next)) {
-			progress_next += os->nr_samples / 16;
-			ui_progress__update(idx, os->nr_samples,
-					    "Processing time ordered events...");
+
+		if (show_progress) {
+			ui_progress__advance(&prog, 1,
+					     "Processing time ordered events...");
 		}
 	}
 
@@ -1310,12 +1313,13 @@ int __perf_session__process_events(struct perf_session *session,
 				   u64 data_offset, u64 data_size,
 				   u64 file_size, struct perf_tool *tool)
 {
-	u64 head, page_offset, file_offset, file_pos, progress_next;
+	u64 head, page_offset, file_offset, file_pos;
 	int err, mmap_prot, mmap_flags, map_idx = 0;
 	size_t	mmap_size;
 	char *buf, *mmaps[NUM_MMAPS];
 	union perf_event *event;
 	uint32_t size;
+	struct perf_progress prog;
 
 	perf_tool__fill_defaults(tool);
 
@@ -1326,7 +1330,7 @@ int __perf_session__process_events(struct perf_session *session,
 	if (data_offset + data_size < file_size)
 		file_size = data_offset + data_size;
 
-	progress_next = file_size / 16;
+	ui_progress__setup(&prog, file_size);
 
 	mmap_size = MMAP_SIZE;
 	if (mmap_size > file_size)
@@ -1381,11 +1385,7 @@ more:
 	head += size;
 	file_pos += size;
 
-	if (file_pos >= progress_next) {
-		progress_next += file_size / 16;
-		ui_progress__update(file_pos, file_size,
-				    "Processing events...");
-	}
+	ui_progress__advance(&prog, size, "Processing events...");
 
 	if (file_pos < file_size)
 		goto more;
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [PATCH 3/8] perf tools: Show progress on histogram collapsing
  2013-10-11  5:15 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Namhyung Kim
  2013-10-11  5:15 ` [PATCH 1/8] perf callchain: Convert children list to rbtree Namhyung Kim
  2013-10-11  5:15 ` [PATCH 2/8] perf ui/progress: Add new helper functions for progress bar Namhyung Kim
@ 2013-10-11  5:15 ` Namhyung Kim
  2013-10-25 10:33   ` [tip:perf/core] " tip-bot for Namhyung Kim
  2013-10-28 15:03   ` [tip:perf/urgent] perf script python: Fix mem leak due to missing Py_DECREFs on dict entries tip-bot for Joseph Schuchart
  2013-10-11  5:15 ` [PATCH 4/8] perf tools: Use an accessor to read thread comm Namhyung Kim
                   ` (6 subsequent siblings)
  9 siblings, 2 replies; 91+ messages in thread
From: Namhyung Kim @ 2013-10-11  5:15 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker, Jiri Olsa

From: Namhyung Kim <namhyung.kim@lge.com>

It can take quite amount of time so add progress bar UI to inform user.

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/builtin-annotate.c |  2 +-
 tools/perf/builtin-diff.c     |  2 +-
 tools/perf/builtin-report.c   | 10 +++++++++-
 tools/perf/builtin-top.c      |  4 ++--
 tools/perf/tests/hists_link.c |  2 +-
 tools/perf/util/hist.c        |  5 ++++-
 tools/perf/util/hist.h        |  3 ++-
 7 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index ddde4074fd91..a732a6caeffe 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -246,7 +246,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
 
 		if (nr_samples > 0) {
 			total_nr_samples += nr_samples;
-			hists__collapse_resort(hists);
+			hists__collapse_resort(hists, NULL);
 			hists__output_resort(hists);
 
 			if (symbol_conf.event_group &&
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index f28799e94f2a..f4ab7d210105 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -367,7 +367,7 @@ static void perf_evlist__collapse_resort(struct perf_evlist *evlist)
 	list_for_each_entry(evsel, &evlist->entries, node) {
 		struct hists *hists = &evsel->hists;
 
-		hists__collapse_resort(hists);
+		hists__collapse_resort(hists, NULL);
 	}
 }
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index c902229f3f94..83920612984a 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -489,6 +489,7 @@ static int __cmd_report(struct perf_report *rep)
 	struct map *kernel_map;
 	struct kmap *kernel_kmap;
 	const char *help = "For a higher level overview, try: perf report --sort comm,dso";
+	struct perf_progress prog;
 
 	signal(SIGINT, sig_handler);
 
@@ -550,13 +551,19 @@ static int __cmd_report(struct perf_report *rep)
 	}
 
 	nr_samples = 0;
+	list_for_each_entry(pos, &session->evlist->entries, node)
+		nr_samples += pos->hists.nr_entries;
+
+	ui_progress__setup(&prog, nr_samples);
+
+	nr_samples = 0;
 	list_for_each_entry(pos, &session->evlist->entries, node) {
 		struct hists *hists = &pos->hists;
 
 		if (pos->idx == 0)
 			hists->symbol_filter_str = rep->symbol_filter_str;
 
-		hists__collapse_resort(hists);
+		hists__collapse_resort(hists, &prog);
 		nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE];
 
 		/* Non-group events are considered as leader */
@@ -568,6 +575,7 @@ static int __cmd_report(struct perf_report *rep)
 			hists__link(leader_hists, hists);
 		}
 	}
+	ui_progress__finish();
 
 	if (nr_samples == 0) {
 		ui__error("The %s file has no samples!\n", session->filename);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index cf4bba07175a..0cb077f823de 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -287,7 +287,7 @@ static void perf_top__print_sym_table(struct perf_top *top)
 		return;
 	}
 
-	hists__collapse_resort(&top->sym_evsel->hists);
+	hists__collapse_resort(&top->sym_evsel->hists, NULL);
 	hists__output_resort(&top->sym_evsel->hists);
 	hists__decay_entries(&top->sym_evsel->hists,
 			     top->hide_user_symbols,
@@ -553,7 +553,7 @@ static void perf_top__sort_new_samples(void *arg)
 	if (t->evlist->selected != NULL)
 		t->sym_evsel = t->evlist->selected;
 
-	hists__collapse_resort(&t->sym_evsel->hists);
+	hists__collapse_resort(&t->sym_evsel->hists, NULL);
 	hists__output_resort(&t->sym_evsel->hists);
 	hists__decay_entries(&t->sym_evsel->hists,
 			     t->hide_user_symbols,
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 4228ffc0d968..3b5faaa5c8cf 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -465,7 +465,7 @@ int test__hists_link(void)
 		goto out;
 
 	list_for_each_entry(evsel, &evlist->entries, node) {
-		hists__collapse_resort(&evsel->hists);
+		hists__collapse_resort(&evsel->hists, NULL);
 
 		if (verbose > 2)
 			print_hists(&evsel->hists);
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index e935d586d288..8b2d96c56d17 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -395,6 +395,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
 	if (!he)
 		return NULL;
 
+	hists->nr_entries++;
 	rb_link_node(&he->rb_node_in, parent, p);
 	rb_insert_color(&he->rb_node_in, hists->entries_in);
 out:
@@ -599,7 +600,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
 	hists__filter_entry_by_symbol(hists, he);
 }
 
-void hists__collapse_resort(struct hists *hists)
+void hists__collapse_resort(struct hists *hists, struct perf_progress *prog)
 {
 	struct rb_root *root;
 	struct rb_node *next;
@@ -624,6 +625,8 @@ void hists__collapse_resort(struct hists *hists)
 			 */
 			hists__apply_filters(hists, n);
 		}
+		if (prog)
+			ui_progress__advance(prog, 1, "Merging related events...");
 	}
 }
 
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 7f29792efc58..f3d6cf6bd7d4 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -5,6 +5,7 @@
 #include <pthread.h>
 #include "callchain.h"
 #include "header.h"
+#include "ui/progress.h"
 
 extern struct callchain_param callchain_param;
 
@@ -104,7 +105,7 @@ struct hist_entry *__hists__add_mem_entry(struct hists *self,
 					  u64 weight);
 
 void hists__output_resort(struct hists *self);
-void hists__collapse_resort(struct hists *self);
+void hists__collapse_resort(struct hists *self, struct perf_progress *prog);
 
 void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
 void hists__output_recalc_col_len(struct hists *hists, int max_rows);
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [PATCH 4/8] perf tools: Use an accessor to read thread comm
  2013-10-11  5:15 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Namhyung Kim
                   ` (2 preceding siblings ...)
  2013-10-11  5:15 ` [PATCH 3/8] perf tools: Show progress on histogram collapsing Namhyung Kim
@ 2013-10-11  5:15 ` Namhyung Kim
  2013-10-11  5:15 ` [PATCH 5/8] perf tools: Add time argument on comm setting Namhyung Kim
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 91+ messages in thread
From: Namhyung Kim @ 2013-10-11  5:15 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker, Jiri Olsa, David Ahern,
	Ingo Molnar, Arnaldo Carvalho de Melo, Stephane Eranian

From: Frederic Weisbecker <fweisbec@gmail.com>

As the thread comm is going to be implemented by way of a more
complicated data structure than just a pointer to a string from the
thread struct, convert the readers of comm to use an accessor instead
of accessing it directly. The accessor will be later overriden to
support an enhanced comm implementation.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-wr683zwy94hmj4ibogmnv9ce@git.kernel.org
[ Rename thread__comm_curr() to thread__comm_str() ]
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
[ Fixed up some minor const pointer issues ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kmem.c                              |  2 +-
 tools/perf/builtin-lock.c                              |  2 +-
 tools/perf/builtin-sched.c                             | 16 ++++++++--------
 tools/perf/builtin-script.c                            |  6 +++---
 tools/perf/builtin-trace.c                             |  4 ++--
 tools/perf/tests/hists_link.c                          |  2 +-
 tools/perf/ui/browsers/hists.c                         | 10 +++++-----
 tools/perf/util/event.c                                |  4 ++--
 tools/perf/util/scripting-engines/trace-event-perl.c   |  2 +-
 tools/perf/util/scripting-engines/trace-event-python.c |  4 ++--
 tools/perf/util/sort.c                                 | 10 +++++-----
 tools/perf/util/thread.c                               |  7 ++++++-
 tools/perf/util/thread.h                               |  1 +
 13 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 9b5f077fee5b..95fc13892c31 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -314,7 +314,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 		return -1;
 	}
 
-	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->tid);
+	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
 
 	if (evsel->handler.func != NULL) {
 		tracepoint_handler f = evsel->handler.func;
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 6a9076f165f4..3df6ee921cca 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -766,7 +766,7 @@ static void dump_threads(void)
 	while (node) {
 		st = container_of(node, struct thread_stat, rb);
 		t = perf_session__findnew(session, st->tid);
-		pr_info("%10d: %s\n", st->tid, t->comm);
+		pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
 		node = rb_next(node);
 	};
 }
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index d8c51b2f263f..eab73335ec98 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -737,12 +737,12 @@ static int replay_fork_event(struct perf_sched *sched,
 
 	if (verbose) {
 		printf("fork event\n");
-		printf("... parent: %s/%d\n", parent->comm, parent->tid);
-		printf("...  child: %s/%d\n", child->comm, child->tid);
+		printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid);
+		printf("...  child: %s/%d\n", thread__comm_str(child), child->tid);
 	}
 
-	register_pid(sched, parent->tid, parent->comm);
-	register_pid(sched, child->tid, child->comm);
+	register_pid(sched, parent->tid, thread__comm_str(parent));
+	register_pid(sched, child->tid, thread__comm_str(child));
 	return 0;
 }
 
@@ -1077,7 +1077,7 @@ static int latency_migrate_task_event(struct perf_sched *sched,
 	if (!atoms) {
 		if (thread_atoms_insert(sched, migrant))
 			return -1;
-		register_pid(sched, migrant->tid, migrant->comm);
+		register_pid(sched, migrant->tid, thread__comm_str(migrant));
 		atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
 		if (!atoms) {
 			pr_err("migration-event: Internal tree error");
@@ -1111,13 +1111,13 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
 	/*
 	 * Ignore idle threads:
 	 */
-	if (!strcmp(work_list->thread->comm, "swapper"))
+	if (!strcmp(thread__comm_str(work_list->thread), "swapper"))
 		return;
 
 	sched->all_runtime += work_list->total_runtime;
 	sched->all_count   += work_list->nb_atoms;
 
-	ret = printf("  %s:%d ", work_list->thread->comm, work_list->thread->tid);
+	ret = printf("  %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
 
 	for (i = 0; i < 24 - ret; i++)
 		printf(" ");
@@ -1334,7 +1334,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 	printf("  %12.6f secs ", (double)timestamp/1e9);
 	if (new_shortname) {
 		printf("%s => %s:%d\n",
-			sched_in->shortname, sched_in->comm, sched_in->tid);
+		       sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
 	} else {
 		printf("\n");
 	}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 7f31a3ded1b6..96f0c2d43513 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -290,11 +290,11 @@ static void print_sample_start(struct perf_sample *sample,
 
 	if (PRINT_FIELD(COMM)) {
 		if (latency_format)
-			printf("%8.8s ", thread->comm);
+			printf("%8.8s ", thread__comm_str(thread));
 		else if (PRINT_FIELD(IP) && symbol_conf.use_callchain)
-			printf("%s ", thread->comm);
+			printf("%s ", thread__comm_str(thread));
 		else
-			printf("%16s ", thread->comm);
+			printf("%16s ", thread__comm_str(thread));
 	}
 
 	if (PRINT_FIELD(PID) && PRINT_FIELD(TID))
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 27c4269b7fe3..d0fdbff3acbd 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1071,7 +1071,7 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre
 
 	if (trace->multiple_threads) {
 		if (trace->show_comm)
-			printed += fprintf(fp, "%.14s/", thread->comm);
+			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
 		printed += fprintf(fp, "%d ", thread->tid);
 	}
 
@@ -1810,7 +1810,7 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv)
 	else if (ratio > 5.0)
 		color = PERF_COLOR_YELLOW;
 
-	printed += color_fprintf(fp, color, "%20s", thread->comm);
+	printed += color_fprintf(fp, color, "%20s", thread__comm_str(thread));
 	printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
 	printed += color_fprintf(fp, color, "%5.1f%%", ratio);
 	printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 3b5faaa5c8cf..3d7977914be6 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -419,7 +419,7 @@ static void print_hists(struct hists *hists)
 		he = rb_entry(node, struct hist_entry, rb_node_in);
 
 		pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n",
-			i, he->thread->comm, he->ms.map->dso->short_name,
+			i, thread__comm_str(he->thread), he->ms.map->dso->short_name,
 			he->ms.sym->name, he->stat.period);
 
 		i++;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 7ef36c360471..a91b6b219412 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1255,7 +1255,7 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size,
 	if (thread)
 		printed += scnprintf(bf + printed, size - printed,
 				    ", Thread: %s(%d)",
-				    (thread->comm_set ? thread->comm : ""),
+				     (thread->comm_set ? thread__comm_str(thread) : ""),
 				    thread->tid);
 	if (dso)
 		printed += scnprintf(bf + printed, size - printed,
@@ -1578,7 +1578,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 		if (thread != NULL &&
 		    asprintf(&options[nr_options], "Zoom %s %s(%d) thread",
 			     (browser->hists->thread_filter ? "out of" : "into"),
-			     (thread->comm_set ? thread->comm : ""),
+			     (thread->comm_set ? thread__comm_str(thread) : ""),
 			     thread->tid) > 0)
 			zoom_thread = nr_options++;
 
@@ -1598,7 +1598,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			struct symbol *sym;
 
 			if (asprintf(&options[nr_options], "Run scripts for samples of thread [%s]",
-				browser->he_selection->thread->comm) > 0)
+				     thread__comm_str(browser->he_selection->thread)) > 0)
 				scripts_comm = nr_options++;
 
 			sym = browser->he_selection->ms.sym;
@@ -1701,7 +1701,7 @@ zoom_out_thread:
 				sort_thread.elide = false;
 			} else {
 				ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
-						   thread->comm_set ? thread->comm : "",
+						   thread->comm_set ? thread__comm_str(thread) : "",
 						   thread->tid);
 				browser->hists->thread_filter = thread;
 				sort_thread.elide = true;
@@ -1717,7 +1717,7 @@ do_scripts:
 			memset(script_opt, 0, 64);
 
 			if (choice == scripts_comm)
-				sprintf(script_opt, " -c %s ", browser->he_selection->thread->comm);
+				sprintf(script_opt, " -c %s ", thread__comm_str(browser->he_selection->thread));
 
 			if (choice == scripts_symbol)
 				sprintf(script_opt, " -S %s ", browser->he_selection->ms.sym->name);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 9b393e7dca6f..e7d4a99c159e 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -725,10 +725,10 @@ int perf_event__preprocess_sample(const union perf_event *event,
 		return -1;
 
 	if (symbol_conf.comm_list &&
-	    !strlist__has_entry(symbol_conf.comm_list, thread->comm))
+	    !strlist__has_entry(symbol_conf.comm_list, thread__comm_str(thread)))
 		goto out_filtered;
 
-	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->tid);
+	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
 	/*
 	 * Have we already created the kernel maps for this machine?
 	 *
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index a85e4ae5f3ac..94f74e12ac1f 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -273,7 +273,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused,
 	int cpu = sample->cpu;
 	void *data = sample->raw_data;
 	unsigned long long nsecs = sample->time;
-	char *comm = thread->comm;
+	const char *comm = thread__comm_str(thread);
 
 	dSP;
 
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index cc75a3cef388..b2337a60f027 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -239,7 +239,7 @@ static void python_process_tracepoint(union perf_event *perf_event
 	int cpu = sample->cpu;
 	void *data = sample->raw_data;
 	unsigned long long nsecs = sample->time;
-	char *comm = thread->comm;
+	const char *comm = thread__comm_str(thread);
 
 	t = PyTuple_New(MAX_FIELDS);
 	if (!t)
@@ -378,7 +378,7 @@ static void python_process_general_event(union perf_event *perf_event
 	PyDict_SetItemString(dict, "raw_buf", PyString_FromStringAndSize(
 			(const char *)sample->raw_data, sample->raw_size));
 	PyDict_SetItemString(dict, "comm",
-			PyString_FromString(thread->comm));
+			     PyString_FromString(thread__comm_str(thread)));
 	if (al->map) {
 		PyDict_SetItemString(dict, "dso",
 			PyString_FromString(al->map->dso->name));
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 082480579861..519a3dde7466 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -42,7 +42,7 @@ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
 	return n;
 }
 
-static int64_t cmp_null(void *l, void *r)
+static int64_t cmp_null(const void *l, const void *r)
 {
 	if (!l && !r)
 		return 0;
@@ -64,7 +64,7 @@ static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
 				       size_t size, unsigned int width)
 {
 	return repsep_snprintf(bf, size, "%*s:%5d", width - 6,
-			      self->thread->comm ?: "", self->thread->tid);
+			       thread__comm_str(self->thread) ?: "", self->thread->tid);
 }
 
 struct sort_entry sort_thread = {
@@ -85,8 +85,8 @@ sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
 static int64_t
 sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
 {
-	char *comm_l = left->thread->comm;
-	char *comm_r = right->thread->comm;
+	const char *comm_l = thread__comm_str(left->thread);
+	const char *comm_r = thread__comm_str(right->thread);
 
 	if (!comm_l || !comm_r)
 		return cmp_null(comm_l, comm_r);
@@ -97,7 +97,7 @@ sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
 static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
 				     size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%*s", width, self->thread->comm);
+	return repsep_snprintf(bf, size, "%*s", width, thread__comm_str(self->thread));
 }
 
 struct sort_entry sort_comm = {
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index e3d4a550a703..39ad50008e54 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -45,6 +45,11 @@ int thread__set_comm(struct thread *self, const char *comm)
 	return err;
 }
 
+const char *thread__comm_str(const struct thread *thread)
+{
+	return thread->comm;
+}
+
 int thread__comm_len(struct thread *self)
 {
 	if (!self->comm_len) {
@@ -58,7 +63,7 @@ int thread__comm_len(struct thread *self)
 
 size_t thread__fprintf(struct thread *thread, FILE *fp)
 {
-	return fprintf(fp, "Thread %d %s\n", thread->tid, thread->comm) +
+	return fprintf(fp, "Thread %d %s\n", thread->tid, thread__comm_str(thread)) +
 	       map_groups__fprintf(&thread->mg, verbose, fp);
 }
 
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 4ebbb40d46d4..6561ad21d9a7 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -35,6 +35,7 @@ static inline void thread__exited(struct thread *thread)
 
 int thread__set_comm(struct thread *self, const char *comm);
 int thread__comm_len(struct thread *self);
+const char *thread__comm_str(const struct thread *thread);
 void thread__insert_map(struct thread *self, struct map *map);
 int thread__fork(struct thread *self, struct thread *parent);
 size_t thread__fprintf(struct thread *thread, FILE *fp);
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [PATCH 5/8] perf tools: Add time argument on comm setting
  2013-10-11  5:15 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Namhyung Kim
                   ` (3 preceding siblings ...)
  2013-10-11  5:15 ` [PATCH 4/8] perf tools: Use an accessor to read thread comm Namhyung Kim
@ 2013-10-11  5:15 ` Namhyung Kim
  2013-10-11  5:15 ` [PATCH 6/8] perf tools: Add new comm infrastructure Namhyung Kim
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 91+ messages in thread
From: Namhyung Kim @ 2013-10-11  5:15 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker, Jiri Olsa, David Ahern,
	Ingo Molnar, Stephane Eranian, Arnaldo Carvalho de Melo

From: Frederic Weisbecker <fweisbec@gmail.com>

This way we can later delimit a lifecycle for the comm and map a hist to
a precise comm:timeslice couple.

Comm and fork events that don't have PERF_SAMPLE_TIME samples can only
send 0 value as a timestamp and thus should overwrite any previous comm
on a given thread because there is no sensible way to keep track of all
the comms lifecycles in a thread without time informations.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-6tyow99vgmmtt9qwr2u2lqd7@git.kernel.org
[ Made it cope with PERF_RECORD_MMAP2 ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c        |  2 +-
 tools/perf/builtin-trace.c      | 12 ++++++------
 tools/perf/tests/code-reading.c |  2 +-
 tools/perf/tests/hists_link.c   |  4 ++--
 tools/perf/util/event.c         | 28 ++++++++++++++--------------
 tools/perf/util/machine.c       | 39 ++++++++++++++++++++++-----------------
 tools/perf/util/machine.h       | 21 ++++++++++++++-------
 tools/perf/util/session.c       |  2 +-
 tools/perf/util/thread.c        |  6 ++++--
 tools/perf/util/thread.h        |  4 ++--
 10 files changed, 67 insertions(+), 53 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 0cb077f823de..bb5364c87c3f 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -856,7 +856,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 						   &sample, machine);
 		} else if (event->header.type < PERF_RECORD_MAX) {
 			hists__inc_nr_events(&evsel->hists, event->header.type);
-			machine__process_event(machine, event);
+			machine__process_event(machine, event, &sample);
 		} else
 			++session->stats.nr_unknown_events;
 	}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d0fdbff3acbd..120a88a3312f 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1079,7 +1079,7 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre
 }
 
 static int trace__process_event(struct trace *trace, struct machine *machine,
-				union perf_event *event)
+				union perf_event *event, struct perf_sample *sample)
 {
 	int ret = 0;
 
@@ -1087,9 +1087,9 @@ static int trace__process_event(struct trace *trace, struct machine *machine,
 	case PERF_RECORD_LOST:
 		color_fprintf(trace->output, PERF_COLOR_RED,
 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
-		ret = machine__process_lost_event(machine, event);
+		ret = machine__process_lost_event(machine, event, sample);
 	default:
-		ret = machine__process_event(machine, event);
+		ret = machine__process_event(machine, event, sample);
 		break;
 	}
 
@@ -1098,11 +1098,11 @@ static int trace__process_event(struct trace *trace, struct machine *machine,
 
 static int trace__tool_process(struct perf_tool *tool,
 			       union perf_event *event,
-			       struct perf_sample *sample __maybe_unused,
+			       struct perf_sample *sample,
 			       struct machine *machine)
 {
 	struct trace *trace = container_of(tool, struct trace, tool);
-	return trace__process_event(trace, machine, event);
+	return trace__process_event(trace, machine, event, sample);
 }
 
 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
@@ -1652,7 +1652,7 @@ again:
 				trace->base_time = sample.time;
 
 			if (type != PERF_RECORD_SAMPLE) {
-				trace__process_event(trace, trace->host, event);
+				trace__process_event(trace, trace->host, event, &sample);
 				continue;
 			}
 
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 6fb781d5586c..38d233a27de6 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -276,7 +276,7 @@ static int process_event(struct machine *machine, struct perf_evlist *evlist,
 		return process_sample_event(machine, evlist, event, state);
 
 	if (event->header.type < PERF_RECORD_MAX)
-		return machine__process_event(machine, event);
+		return machine__process_event(machine, event, NULL);
 
 	return 0;
 }
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 3d7977914be6..00b3d21c1ea5 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -93,7 +93,7 @@ static struct machine *setup_fake_machine(struct machines *machines)
 		if (thread == NULL)
 			goto out;
 
-		thread__set_comm(thread, fake_threads[i].comm);
+		thread__set_comm(thread, fake_threads[i].comm, 0);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
@@ -110,7 +110,7 @@ static struct machine *setup_fake_machine(struct machines *machines)
 		strcpy(fake_mmap_event.mmap.filename,
 		       fake_mmap_info[i].filename);
 
-		machine__process_mmap_event(machine, &fake_mmap_event);
+		machine__process_mmap_event(machine, &fake_mmap_event, NULL);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index e7d4a99c159e..24d0d3be1ec3 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -516,18 +516,18 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
 
 int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __maybe_unused,
+			     struct perf_sample *sample,
 			     struct machine *machine)
 {
-	return machine__process_comm_event(machine, event);
+	return machine__process_comm_event(machine, event, sample);
 }
 
 int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __maybe_unused,
+			     struct perf_sample *sample,
 			     struct machine *machine)
 {
-	return machine__process_lost_event(machine, event);
+	return machine__process_lost_event(machine, event, sample);
 }
 
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
@@ -550,18 +550,18 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
 
 int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __maybe_unused,
+			     struct perf_sample *sample,
 			     struct machine *machine)
 {
-	return machine__process_mmap_event(machine, event);
+	return machine__process_mmap_event(machine, event, sample);
 }
 
 int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __maybe_unused,
+			     struct perf_sample *sample,
 			     struct machine *machine)
 {
-	return machine__process_mmap2_event(machine, event);
+	return machine__process_mmap2_event(machine, event, sample);
 }
 
 size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
@@ -573,18 +573,18 @@ size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
 
 int perf_event__process_fork(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __maybe_unused,
+			     struct perf_sample *sample,
 			     struct machine *machine)
 {
-	return machine__process_fork_event(machine, event);
+	return machine__process_fork_event(machine, event, sample);
 }
 
 int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __maybe_unused,
+			     struct perf_sample *sample,
 			     struct machine *machine)
 {
-	return machine__process_exit_event(machine, event);
+	return machine__process_exit_event(machine, event, sample);
 }
 
 size_t perf_event__fprintf(union perf_event *event, FILE *fp)
@@ -615,10 +615,10 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
 
 int perf_event__process(struct perf_tool *tool __maybe_unused,
 			union perf_event *event,
-			struct perf_sample *sample __maybe_unused,
+			struct perf_sample *sample,
 			struct machine *machine)
 {
-	return machine__process_event(machine, event);
+	return machine__process_event(machine, event, sample);
 }
 
 void thread__find_addr_map(struct thread *self,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index c616402d1bea..9feac7e92767 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -40,7 +40,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 			return -ENOMEM;
 
 		snprintf(comm, sizeof(comm), "[guest/%d]", pid);
-		thread__set_comm(thread, comm);
+		thread__set_comm(thread, comm, 0);
 	}
 
 	return 0;
@@ -331,7 +331,8 @@ struct thread *machine__find_thread(struct machine *machine, pid_t tid)
 	return __machine__findnew_thread(machine, 0, tid, false);
 }
 
-int machine__process_comm_event(struct machine *machine, union perf_event *event)
+int machine__process_comm_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample)
 {
 	struct thread *thread = machine__findnew_thread(machine,
 							event->comm.pid,
@@ -340,7 +341,7 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
 	if (dump_trace)
 		perf_event__fprintf_comm(event, stdout);
 
-	if (thread == NULL || thread__set_comm(thread, event->comm.comm)) {
+	if (thread == NULL || thread__set_comm(thread, event->comm.comm, sample->time)) {
 		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
 		return -1;
 	}
@@ -349,7 +350,7 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
 }
 
 int machine__process_lost_event(struct machine *machine __maybe_unused,
-				union perf_event *event)
+				union perf_event *event, struct perf_sample *sample __maybe_unused)
 {
 	dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n",
 		    event->lost.id, event->lost.lost);
@@ -984,7 +985,8 @@ out_problem:
 }
 
 int machine__process_mmap2_event(struct machine *machine,
-				 union perf_event *event)
+				 union perf_event *event,
+				 struct perf_sample *sample __maybe_unused)
 {
 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct thread *thread;
@@ -1031,7 +1033,8 @@ out_problem:
 	return 0;
 }
 
-int machine__process_mmap_event(struct machine *machine, union perf_event *event)
+int machine__process_mmap_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample __maybe_unused)
 {
 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct thread *thread;
@@ -1088,7 +1091,8 @@ static void machine__remove_thread(struct machine *machine, struct thread *th)
 	list_add_tail(&th->node, &machine->dead_threads);
 }
 
-int machine__process_fork_event(struct machine *machine, union perf_event *event)
+int machine__process_fork_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample)
 {
 	struct thread *thread = machine__find_thread(machine, event->fork.tid);
 	struct thread *parent = machine__findnew_thread(machine,
@@ -1105,7 +1109,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 		perf_event__fprintf_task(event, stdout);
 
 	if (thread == NULL || parent == NULL ||
-	    thread__fork(thread, parent) < 0) {
+	    thread__fork(thread, parent, sample->time) < 0) {
 		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
 		return -1;
 	}
@@ -1113,8 +1117,8 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 	return 0;
 }
 
-int machine__process_exit_event(struct machine *machine __maybe_unused,
-				union perf_event *event)
+int machine__process_exit_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample __maybe_unused)
 {
 	struct thread *thread = machine__find_thread(machine, event->fork.tid);
 
@@ -1127,23 +1131,24 @@ int machine__process_exit_event(struct machine *machine __maybe_unused,
 	return 0;
 }
 
-int machine__process_event(struct machine *machine, union perf_event *event)
+int machine__process_event(struct machine *machine, union perf_event *event,
+			   struct perf_sample *sample)
 {
 	int ret;
 
 	switch (event->header.type) {
 	case PERF_RECORD_COMM:
-		ret = machine__process_comm_event(machine, event); break;
+		ret = machine__process_comm_event(machine, event, sample); break;
 	case PERF_RECORD_MMAP:
-		ret = machine__process_mmap_event(machine, event); break;
+		ret = machine__process_mmap_event(machine, event, sample); break;
 	case PERF_RECORD_MMAP2:
-		ret = machine__process_mmap2_event(machine, event); break;
+		ret = machine__process_mmap2_event(machine, event, sample); break;
 	case PERF_RECORD_FORK:
-		ret = machine__process_fork_event(machine, event); break;
+		ret = machine__process_fork_event(machine, event, sample); break;
 	case PERF_RECORD_EXIT:
-		ret = machine__process_exit_event(machine, event); break;
+		ret = machine__process_exit_event(machine, event, sample); break;
 	case PERF_RECORD_LOST:
-		ret = machine__process_lost_event(machine, event); break;
+		ret = machine__process_lost_event(machine, event, sample); break;
 	default:
 		ret = -1;
 		break;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index d44c09bdc45e..9b7f03bbc535 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -40,13 +40,20 @@ struct map *machine__kernel_map(struct machine *machine, enum map_type type)
 
 struct thread *machine__find_thread(struct machine *machine, pid_t tid);
 
-int machine__process_comm_event(struct machine *machine, union perf_event *event);
-int machine__process_exit_event(struct machine *machine, union perf_event *event);
-int machine__process_fork_event(struct machine *machine, union perf_event *event);
-int machine__process_lost_event(struct machine *machine, union perf_event *event);
-int machine__process_mmap_event(struct machine *machine, union perf_event *event);
-int machine__process_mmap2_event(struct machine *machine, union perf_event *event);
-int machine__process_event(struct machine *machine, union perf_event *event);
+int machine__process_comm_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_exit_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_fork_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_lost_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_mmap_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
+				 struct perf_sample *sample);
+int machine__process_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
 
 typedef void (*machine__process_t)(struct machine *machine, void *data);
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 9f62be8bc167..8de3ab388187 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1130,7 +1130,7 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se
 {
 	struct thread *thread = perf_session__findnew(self, 0);
 
-	if (thread == NULL || thread__set_comm(thread, "swapper")) {
+	if (thread == NULL || thread__set_comm(thread, "swapper", 0)) {
 		pr_err("problem inserting idle task.\n");
 		thread = NULL;
 	}
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 39ad50008e54..205b6368f175 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -31,7 +31,8 @@ void thread__delete(struct thread *self)
 	free(self);
 }
 
-int thread__set_comm(struct thread *self, const char *comm)
+int thread__set_comm(struct thread *self, const char *comm,
+		     u64 timestamp __maybe_unused)
 {
 	int err;
 
@@ -73,7 +74,8 @@ void thread__insert_map(struct thread *self, struct map *map)
 	map_groups__insert(&self->mg, map);
 }
 
-int thread__fork(struct thread *self, struct thread *parent)
+int thread__fork(struct thread *self, struct thread *parent,
+		 u64 timestamp __maybe_unused)
 {
 	int i;
 
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 6561ad21d9a7..b7cdf64c2304 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -33,11 +33,11 @@ static inline void thread__exited(struct thread *thread)
 	thread->dead = true;
 }
 
-int thread__set_comm(struct thread *self, const char *comm);
+int thread__set_comm(struct thread *self, const char *comm, u64 timestamp);
 int thread__comm_len(struct thread *self);
 const char *thread__comm_str(const struct thread *thread);
 void thread__insert_map(struct thread *self, struct map *map);
-int thread__fork(struct thread *self, struct thread *parent);
+int thread__fork(struct thread *self, struct thread *parent, u64 timestamp);
 size_t thread__fprintf(struct thread *thread, FILE *fp);
 
 static inline struct map *thread__find_map(struct thread *self,
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-11  5:15 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Namhyung Kim
                   ` (4 preceding siblings ...)
  2013-10-11  5:15 ` [PATCH 5/8] perf tools: Add time argument on comm setting Namhyung Kim
@ 2013-10-11  5:15 ` Namhyung Kim
  2013-10-25 10:56   ` Frederic Weisbecker
  2013-10-11  5:15 ` [PATCH 7/8] perf tools: Compare hists comm by addresses Namhyung Kim
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 91+ messages in thread
From: Namhyung Kim @ 2013-10-11  5:15 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker, Jiri Olsa, David Ahern,
	Ingo Molnar, Arnaldo Carvalho de Melo, Stephane Eranian

From: Frederic Weisbecker <fweisbec@gmail.com>

This new comm infrastructure provides two features:

1) It keeps track of all comms lifecycle for a given thread. This
way we can associate a timeframe to any thread comm, as long as
PERF_SAMPLE_TIME samples are joined to comm and fork events.

As a result we should have more precise comm sorted hists with
seperated entries for pre and post exec time after a fork.

2) It also makes sure that a given comm string is not duplicated
but rather shared among the threads that refer to it. This way the
threads comm can be compared against pointer values from the sort
infrastructure.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-hwjf70b2wve9m2kosxiq8bb3@git.kernel.org
[ Rename some accessor functions ]
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
[ Fixed up minor const pointer issues and removed 'self' usage ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile      |   2 +
 tools/perf/util/comm.c   | 106 +++++++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/comm.h   |  20 +++++++++
 tools/perf/util/thread.c | 100 +++++++++++++++++++++++++++++++-------------
 tools/perf/util/thread.h |   3 +-
 5 files changed, 202 insertions(+), 29 deletions(-)
 create mode 100644 tools/perf/util/comm.c
 create mode 100644 tools/perf/util/comm.h

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index b62e12d2ba6b..42001f387e7f 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -272,6 +272,7 @@ LIB_H += util/color.h
 LIB_H += util/values.h
 LIB_H += util/sort.h
 LIB_H += util/hist.h
+LIB_H += util/comm.h
 LIB_H += util/thread.h
 LIB_H += util/thread_map.h
 LIB_H += util/trace-event.h
@@ -339,6 +340,7 @@ LIB_OBJS += $(OUTPUT)util/machine.o
 LIB_OBJS += $(OUTPUT)util/map.o
 LIB_OBJS += $(OUTPUT)util/pstack.o
 LIB_OBJS += $(OUTPUT)util/session.o
+LIB_OBJS += $(OUTPUT)util/comm.o
 LIB_OBJS += $(OUTPUT)util/thread.o
 LIB_OBJS += $(OUTPUT)util/thread_map.o
 LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
new file mode 100644
index 000000000000..0cfb55202537
--- /dev/null
+++ b/tools/perf/util/comm.c
@@ -0,0 +1,106 @@
+#include "comm.h"
+#include "util.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+struct comm_str {
+	char *str;
+	struct rb_node rb_node;
+	int ref;
+};
+
+/* Should perhaps be moved to struct machine */
+static struct rb_root comm_str_root;
+
+static void comm_str_get(struct comm_str *cs)
+{
+	cs->ref++;
+}
+
+static void comm_str_put(struct comm_str *cs)
+{
+	if (!--cs->ref) {
+		rb_erase(&cs->rb_node, &comm_str_root);
+		free(cs->str);
+		free(cs);
+	}
+}
+
+static struct comm_str *comm_str_alloc(const char *str)
+{
+	struct comm_str *cs;
+
+	cs = zalloc(sizeof(*cs));
+	if (!cs)
+		return NULL;
+
+	cs->str = strdup(str);
+	if (!cs->str) {
+		free(cs);
+		return NULL;
+	}
+
+	return cs;
+}
+
+static struct comm_str *comm_str_findnew(const char *str, struct rb_root *root)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct comm_str *iter, *new;
+	int cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct comm_str, rb_node);
+
+		cmp = strcmp(str, iter->str);
+		if (!cmp)
+			return iter;
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	new = comm_str_alloc(str);
+	if (!new)
+		return NULL;
+
+	rb_link_node(&new->rb_node, parent, p);
+	rb_insert_color(&new->rb_node, root);
+
+	return new;
+}
+
+struct comm *comm__new(const char *str, u64 timestamp)
+{
+	struct comm *comm = zalloc(sizeof(*comm));
+
+	if (!comm)
+		return NULL;
+
+	comm->start = timestamp;
+
+	comm->comm_str = comm_str_findnew(str, &comm_str_root);
+	if (!comm->comm_str) {
+		free(comm);
+		return NULL;
+	}
+
+	comm_str_get(comm->comm_str);
+
+	return comm;
+}
+
+void comm__free(struct comm *comm)
+{
+	comm_str_put(comm->comm_str);
+	free(comm);
+}
+
+const char *comm__str(const struct comm *comm)
+{
+	return comm->comm_str->str;
+}
diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h
new file mode 100644
index 000000000000..f62d215bede2
--- /dev/null
+++ b/tools/perf/util/comm.h
@@ -0,0 +1,20 @@
+#ifndef __PERF_COMM_H
+#define __PERF_COMM_H
+
+#include "../perf.h"
+#include <linux/rbtree.h>
+#include <linux/list.h>
+
+struct comm_str;
+
+struct comm {
+	struct comm_str *comm_str;
+	u64 start;
+	struct list_head list;
+};
+
+void comm__free(struct comm *comm);
+struct comm *comm__new(const char *str, u64 timestamp);
+const char *comm__str(const struct comm *comm);
+
+#endif  /* __PERF_COMM_H */
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 205b6368f175..095b725d0cd4 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -6,9 +6,12 @@
 #include "thread.h"
 #include "util.h"
 #include "debug.h"
+#include "comm.h"
 
 struct thread *thread__new(pid_t pid, pid_t tid)
 {
+	char *comm_str;
+	struct comm *comm;
 	struct thread *self = zalloc(sizeof(*self));
 
 	if (self != NULL) {
@@ -16,47 +19,88 @@ struct thread *thread__new(pid_t pid, pid_t tid)
 		self->pid_ = pid;
 		self->tid = tid;
 		self->ppid = -1;
-		self->comm = malloc(32);
-		if (self->comm)
-			snprintf(self->comm, 32, ":%d", self->tid);
+		INIT_LIST_HEAD(&self->comm_list);
+
+		comm_str = malloc(32);
+		if (!comm_str)
+			goto err_thread;
+
+		snprintf(comm_str, 32, ":%d", tid);
+		comm = comm__new(comm_str, 0);
+		free(comm_str);
+		if (!comm)
+			goto err_thread;
+
+		list_add(&comm->list, &self->comm_list);
 	}
 
 	return self;
+
+err_thread:
+	free(self);
+	return NULL;
 }
 
 void thread__delete(struct thread *self)
 {
+	struct comm *comm, *tmp;
+
 	map_groups__exit(&self->mg);
-	free(self->comm);
+	list_for_each_entry_safe(comm, tmp, &self->comm_list, list) {
+		list_del(&comm->list);
+		comm__free(comm);
+	}
+
 	free(self);
 }
 
-int thread__set_comm(struct thread *self, const char *comm,
-		     u64 timestamp __maybe_unused)
+static struct comm *thread__comm(const struct thread *thread)
 {
-	int err;
-
-	if (self->comm)
-		free(self->comm);
-	self->comm = strdup(comm);
-	err = self->comm == NULL ? -ENOMEM : 0;
-	if (!err) {
-		self->comm_set = true;
+	if (list_empty(&thread->comm_list))
+		return NULL;
+
+	return list_first_entry(&thread->comm_list, struct comm, list);
+}
+
+/* CHECKME: time should always be 0 if event aren't ordered */
+int thread__set_comm(struct thread *thread, const char *str, u64 timestamp)
+{
+	struct comm *new, *curr = thread__comm(thread);
+
+	/* Override latest entry if it had no specific time coverage */
+	if (!curr->start) {
+		list_del(&curr->list);
+		comm__free(curr);
 	}
-	return err;
+
+	new = comm__new(str, timestamp);
+	if (!new)
+		return -ENOMEM;
+
+	list_add(&new->list, &thread->comm_list);
+	thread->comm_set = true;
+
+	return 0;
 }
 
 const char *thread__comm_str(const struct thread *thread)
 {
-	return thread->comm;
+	const struct comm *comm = thread__comm(thread);
+
+	if (!comm)
+		return NULL;
+
+	return comm__str(comm);
 }
 
+/* CHECKME: it should probably better return the max comm len from its comm list */
 int thread__comm_len(struct thread *self)
 {
 	if (!self->comm_len) {
-		if (!self->comm)
+		const char *comm = thread__comm_str(self);
+		if (!comm)
 			return 0;
-		self->comm_len = strlen(self->comm);
+		self->comm_len = strlen(comm);
 	}
 
 	return self->comm_len;
@@ -74,25 +118,25 @@ void thread__insert_map(struct thread *self, struct map *map)
 	map_groups__insert(&self->mg, map);
 }
 
-int thread__fork(struct thread *self, struct thread *parent,
-		 u64 timestamp __maybe_unused)
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
 {
-	int i;
+	int i, err;
 
 	if (parent->comm_set) {
-		if (self->comm)
-			free(self->comm);
-		self->comm = strdup(parent->comm);
-		if (!self->comm)
+		const char *comm = thread__comm_str(parent);
+		if (!comm)
 			return -ENOMEM;
-		self->comm_set = true;
+		err = thread__set_comm(thread, comm, timestamp);
+		if (!err)
+			return err;
+		thread->comm_set = true;
 	}
 
 	for (i = 0; i < MAP__NR_TYPES; ++i)
-		if (map_groups__clone(&self->mg, &parent->mg, i) < 0)
+		if (map_groups__clone(&thread->mg, &parent->mg, i) < 0)
 			return -ENOMEM;
 
-	self->ppid = parent->tid;
+	thread->ppid = parent->tid;
 
 	return 0;
 }
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index b7cdf64c2304..307d2ec60123 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -2,6 +2,7 @@
 #define __PERF_THREAD_H
 
 #include <linux/rbtree.h>
+#include <linux/list.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include "symbol.h"
@@ -18,7 +19,7 @@ struct thread {
 	char			shortname[3];
 	bool			comm_set;
 	bool			dead; /* if set thread has exited */
-	char			*comm;
+	struct list_head	comm_list;
 	int			comm_len;
 
 	void			*priv;
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [PATCH 7/8] perf tools: Compare hists comm by addresses
  2013-10-11  5:15 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Namhyung Kim
                   ` (5 preceding siblings ...)
  2013-10-11  5:15 ` [PATCH 6/8] perf tools: Add new comm infrastructure Namhyung Kim
@ 2013-10-11  5:15 ` Namhyung Kim
  2013-11-04 20:19   ` [tip:perf/core] " tip-bot for Frederic Weisbecker
  2013-10-11  5:15 ` [PATCH 8/8] perf tools: Get current comm instead of last one Namhyung Kim
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 91+ messages in thread
From: Namhyung Kim @ 2013-10-11  5:15 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker, Jiri Olsa, David Ahern,
	Ingo Molnar, Arnaldo Carvalho de Melo, Stephane Eranian

From: Frederic Weisbecker <fweisbec@gmail.com>

Now that comm strings are allocated only once and refcounted to be shared
among threads, these can now be safely compared by addresses. This
should remove most hists collapses on post processing.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/sort.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 519a3dde7466..e68833f3932d 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -79,7 +79,8 @@ struct sort_entry sort_thread = {
 static int64_t
 sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	return right->thread->tid - left->thread->tid;
+	/* Compare the addr that should be unique among comm */
+	return thread__comm_str(right->thread) - thread__comm_str(left->thread);
 }
 
 static int64_t
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [PATCH 8/8] perf tools: Get current comm instead of last one
  2013-10-11  5:15 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Namhyung Kim
                   ` (6 preceding siblings ...)
  2013-10-11  5:15 ` [PATCH 7/8] perf tools: Compare hists comm by addresses Namhyung Kim
@ 2013-10-11  5:15 ` Namhyung Kim
  2013-10-11  5:58 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Ingo Molnar
  2013-10-13 12:34 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Jiri Olsa
  9 siblings, 0 replies; 91+ messages in thread
From: Namhyung Kim @ 2013-10-11  5:15 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker, Jiri Olsa,
	Arnaldo Carvalho de Melo

From: Namhyung Kim <namhyung.kim@lge.com>

At insert time, a hist entry should reference comm at the time
otherwise it'll get the last comm anyway.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-n6pykiiymtgmcjs834go2t8x@git.kernel.org
[ Fixed up const pointer issues ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/comm.c   | 15 +++++++++++++++
 tools/perf/util/comm.h   |  1 +
 tools/perf/util/hist.c   |  3 +++
 tools/perf/util/sort.c   | 14 +++++---------
 tools/perf/util/sort.h   |  1 +
 tools/perf/util/thread.c |  6 +++---
 tools/perf/util/thread.h |  2 ++
 7 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index 0cfb55202537..ecd3c9938d3e 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -94,6 +94,21 @@ struct comm *comm__new(const char *str, u64 timestamp)
 	return comm;
 }
 
+void comm__override(struct comm *comm, const char *str, u64 timestamp)
+{
+	struct comm_str *old = comm->comm_str;
+
+	comm->comm_str = comm_str_findnew(str, &comm_str_root);
+	if (!comm->comm_str) {
+		comm->comm_str = old;
+		return;
+	}
+
+	comm->start = timestamp;
+	comm_str_get(comm->comm_str);
+	comm_str_put(old);
+}
+
 void comm__free(struct comm *comm)
 {
 	comm_str_put(comm->comm_str);
diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h
index f62d215bede2..b2c364b37468 100644
--- a/tools/perf/util/comm.h
+++ b/tools/perf/util/comm.h
@@ -16,5 +16,6 @@ struct comm {
 void comm__free(struct comm *comm);
 struct comm *comm__new(const char *str, u64 timestamp);
 const char *comm__str(const struct comm *comm);
+void comm__override(struct comm *self, const char *str, u64 timestamp);
 
 #endif  /* __PERF_COMM_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 8b2d96c56d17..e86d18a674b2 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -412,6 +412,7 @@ struct hist_entry *__hists__add_mem_entry(struct hists *self,
 {
 	struct hist_entry entry = {
 		.thread	= al->thread,
+		.comm = thread__comm(al->thread),
 		.ms = {
 			.map	= al->map,
 			.sym	= al->sym,
@@ -442,6 +443,7 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self,
 {
 	struct hist_entry entry = {
 		.thread	= al->thread,
+		.comm = thread__comm(al->thread),
 		.ms = {
 			.map	= bi->to.map,
 			.sym	= bi->to.sym,
@@ -471,6 +473,7 @@ struct hist_entry *__hists__add_entry(struct hists *self,
 {
 	struct hist_entry entry = {
 		.thread	= al->thread,
+		.comm = thread__comm(al->thread),
 		.ms = {
 			.map	= al->map,
 			.sym	= al->sym,
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index e68833f3932d..b4cf0cf077ca 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1,5 +1,6 @@
 #include "sort.h"
 #include "hist.h"
+#include "comm.h"
 #include "symbol.h"
 
 regex_t		parent_regex;
@@ -80,25 +81,20 @@ static int64_t
 sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	/* Compare the addr that should be unique among comm */
-	return thread__comm_str(right->thread) - thread__comm_str(left->thread);
+	return comm__str(right->comm) - comm__str(left->comm);
 }
 
 static int64_t
 sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
 {
-	const char *comm_l = thread__comm_str(left->thread);
-	const char *comm_r = thread__comm_str(right->thread);
-
-	if (!comm_l || !comm_r)
-		return cmp_null(comm_l, comm_r);
-
-	return strcmp(comm_l, comm_r);
+	/* Compare the addr that should be unique among comm */
+	return comm__str(right->comm) - comm__str(left->comm);
 }
 
 static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
 				     size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%*s", width, thread__comm_str(self->thread));
+	return repsep_snprintf(bf, size, "%*s", width, comm__str(self->comm));
 }
 
 struct sort_entry sort_comm = {
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 4e80dbd271e7..4d0153f83e3c 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -84,6 +84,7 @@ struct hist_entry {
 	struct he_stat		stat;
 	struct map_symbol	ms;
 	struct thread		*thread;
+	struct comm		*comm;
 	u64			ip;
 	s32			cpu;
 
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 095b725d0cd4..cddbde8672b2 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -54,7 +54,7 @@ void thread__delete(struct thread *self)
 	free(self);
 }
 
-static struct comm *thread__comm(const struct thread *thread)
+struct comm *thread__comm(const struct thread *thread)
 {
 	if (list_empty(&thread->comm_list))
 		return NULL;
@@ -69,8 +69,8 @@ int thread__set_comm(struct thread *thread, const char *str, u64 timestamp)
 
 	/* Override latest entry if it had no specific time coverage */
 	if (!curr->start) {
-		list_del(&curr->list);
-		comm__free(curr);
+		comm__override(curr, str, timestamp);
+		return 0;
 	}
 
 	new = comm__new(str, timestamp);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 307d2ec60123..eaf3e46148b5 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -26,6 +26,7 @@ struct thread {
 };
 
 struct machine;
+struct comm;
 
 struct thread *thread__new(pid_t pid, pid_t tid);
 void thread__delete(struct thread *self);
@@ -36,6 +37,7 @@ static inline void thread__exited(struct thread *thread)
 
 int thread__set_comm(struct thread *self, const char *comm, u64 timestamp);
 int thread__comm_len(struct thread *self);
+struct comm *thread__comm(const struct thread *self);
 const char *thread__comm_str(const struct thread *thread);
 void thread__insert_map(struct thread *self, struct map *map);
 int thread__fork(struct thread *self, struct thread *parent, u64 timestamp);
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-11  5:15 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Namhyung Kim
                   ` (7 preceding siblings ...)
  2013-10-11  5:15 ` [PATCH 8/8] perf tools: Get current comm instead of last one Namhyung Kim
@ 2013-10-11  5:58 ` Ingo Molnar
  2013-10-11  7:34   ` Jiri Olsa
  2013-10-13 12:34 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Jiri Olsa
  9 siblings, 1 reply; 91+ messages in thread
From: Ingo Molnar @ 2013-10-11  5:58 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Peter Zijlstra, Paul Mackerras,
	Namhyung Kim, LKML, Linus Torvalds, Frederic Weisbecker,
	Jiri Olsa


* Namhyung Kim <namhyung@kernel.org> wrote:

> Hello,
> 
> This is a new version of callchain improvement patchset.  Basically
> it's almost same as v4 but rebased on current acme/perf/core and some
> functions are renamed as Frederic requested.
> 
> Now I'm hunting down a bug in 'perf report -s sym' which was found
> during the test, but I think it's not related to this change as it can
> be reproduced in earlier versions too.
> 
> I put this series on 'perf/callchain-v5' branch in my tree
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
> 
> Any comments are welcome, thanks.

One thing I noticed about call-graph profiling:

  comet:~/tip/tools/perf> ./perf record -g ~/hackbench 10
  callchain: Unknown -g option value: /home/mingo/hackbench

I think a naked -g used to work just fine in the past. Even if such an 
error is displayed the output is very unhelpful, it does does a full 
perf-record options dump (unnecessary), the bit that is helpful is hidden 
amongst many other options:

    -g, --call-graph <mode[,dump_size]>
                          do call-graph (stack chain/backtrace) recording: [fp] dwarf

and it took me two reads to see that I should specify 'fp'. The '[fp]' 
indicates that fp is the default - but that does not appear to be working.

Another thing I noticed is that there are a fair number of conflicts 
against perf/core, and some more against Arnaldo's tree.

Otherwise it seems to be working well!

Thanks,

	Ingo


^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-11  5:58 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Ingo Molnar
@ 2013-10-11  7:34   ` Jiri Olsa
  2013-10-11  8:24     ` Namhyung Kim
  2013-10-11 15:11     ` David Ahern
  0 siblings, 2 replies; 91+ messages in thread
From: Jiri Olsa @ 2013-10-11  7:34 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Namhyung Kim, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Paul Mackerras, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker

On Fri, Oct 11, 2013 at 07:58:29AM +0200, Ingo Molnar wrote:
> 
> * Namhyung Kim <namhyung@kernel.org> wrote:
> 
> > Hello,
> > 
> > This is a new version of callchain improvement patchset.  Basically
> > it's almost same as v4 but rebased on current acme/perf/core and some
> > functions are renamed as Frederic requested.
> > 
> > Now I'm hunting down a bug in 'perf report -s sym' which was found
> > during the test, but I think it's not related to this change as it can
> > be reproduced in earlier versions too.
> > 
> > I put this series on 'perf/callchain-v5' branch in my tree
> > 
> >   git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
> > 
> > Any comments are welcome, thanks.
> 
> One thing I noticed about call-graph profiling:
> 
>   comet:~/tip/tools/perf> ./perf record -g ~/hackbench 10
>   callchain: Unknown -g option value: /home/mingo/hackbench
> 
> I think a naked -g used to work just fine in the past. Even if such an 
> error is displayed the output is very unhelpful, it does does a full 
> perf-record options dump (unnecessary), the bit that is helpful is hidden 
> amongst many other options:
> 
>     -g, --call-graph <mode[,dump_size]>
>                           do call-graph (stack chain/backtrace) recording: [fp] dwarf
> 
> and it took me two reads to see that I should specify 'fp'. The '[fp]' 
> indicates that fp is the default - but that does not appear to be working.

'-g' takes optional parameter, so having it in front of
non option string is causing the error, you could use:

./perf record -g -- ~/hackbench 10

maybe we could display just only help string of the option
we failed to process in this case

jirka

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-11  7:34   ` Jiri Olsa
@ 2013-10-11  8:24     ` Namhyung Kim
  2013-10-11 12:59       ` Ingo Molnar
  2013-10-11 15:11     ` David Ahern
  1 sibling, 1 reply; 91+ messages in thread
From: Namhyung Kim @ 2013-10-11  8:24 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Ingo Molnar, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Paul Mackerras, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker

Hi Jiri,

On Fri, 11 Oct 2013 09:34:48 +0200, Jiri Olsa wrote:
> On Fri, Oct 11, 2013 at 07:58:29AM +0200, Ingo Molnar wrote:
>> 
>> * Namhyung Kim <namhyung@kernel.org> wrote:
>> 
>> > Hello,
>> > 
>> > This is a new version of callchain improvement patchset.  Basically
>> > it's almost same as v4 but rebased on current acme/perf/core and some
>> > functions are renamed as Frederic requested.
>> > 
>> > Now I'm hunting down a bug in 'perf report -s sym' which was found
>> > during the test, but I think it's not related to this change as it can
>> > be reproduced in earlier versions too.
>> > 
>> > I put this series on 'perf/callchain-v5' branch in my tree
>> > 
>> >   git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
>> > 
>> > Any comments are welcome, thanks.
>> 
>> One thing I noticed about call-graph profiling:
>> 
>>   comet:~/tip/tools/perf> ./perf record -g ~/hackbench 10
>>   callchain: Unknown -g option value: /home/mingo/hackbench
>> 
>> I think a naked -g used to work just fine in the past. Even if such an 
>> error is displayed the output is very unhelpful, it does does a full 
>> perf-record options dump (unnecessary), the bit that is helpful is hidden 
>> amongst many other options:
>> 
>>     -g, --call-graph <mode[,dump_size]>
>>                           do call-graph (stack chain/backtrace) recording: [fp] dwarf
>> 
>> and it took me two reads to see that I should specify 'fp'. The '[fp]' 
>> indicates that fp is the default - but that does not appear to be working.
>
> '-g' takes optional parameter, so having it in front of
> non option string is causing the error, you could use:
>
> ./perf record -g -- ~/hackbench 10

Yes, I think this is an unfortunate change to break some user's
scripts.  It'd be great if it detect whether the next argument belongs
to the option, or if not, pass it to next normally - but it seems to be
not so simple IMHO.

>
> maybe we could display just only help string of the option
> we failed to process in this case

Right, I think it's more helpful in most cases.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-11  8:24     ` Namhyung Kim
@ 2013-10-11 12:59       ` Ingo Molnar
  2013-10-11 13:04         ` Peter Zijlstra
  0 siblings, 1 reply; 91+ messages in thread
From: Ingo Molnar @ 2013-10-11 12:59 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Jiri Olsa, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Paul Mackerras, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker


* Namhyung Kim <namhyung@kernel.org> wrote:

> Hi Jiri,
> 
> On Fri, 11 Oct 2013 09:34:48 +0200, Jiri Olsa wrote:
> > On Fri, Oct 11, 2013 at 07:58:29AM +0200, Ingo Molnar wrote:
> >> 
> >> * Namhyung Kim <namhyung@kernel.org> wrote:
> >> 
> >> > Hello,
> >> > 
> >> > This is a new version of callchain improvement patchset.  Basically
> >> > it's almost same as v4 but rebased on current acme/perf/core and some
> >> > functions are renamed as Frederic requested.
> >> > 
> >> > Now I'm hunting down a bug in 'perf report -s sym' which was found
> >> > during the test, but I think it's not related to this change as it can
> >> > be reproduced in earlier versions too.
> >> > 
> >> > I put this series on 'perf/callchain-v5' branch in my tree
> >> > 
> >> >   git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
> >> > 
> >> > Any comments are welcome, thanks.
> >> 
> >> One thing I noticed about call-graph profiling:
> >> 
> >>   comet:~/tip/tools/perf> ./perf record -g ~/hackbench 10
> >>   callchain: Unknown -g option value: /home/mingo/hackbench
> >> 
> >> I think a naked -g used to work just fine in the past. Even if such an 
> >> error is displayed the output is very unhelpful, it does does a full 
> >> perf-record options dump (unnecessary), the bit that is helpful is hidden 
> >> amongst many other options:
> >> 
> >>     -g, --call-graph <mode[,dump_size]>
> >>                           do call-graph (stack chain/backtrace) recording: [fp] dwarf
> >> 
> >> and it took me two reads to see that I should specify 'fp'. The '[fp]' 
> >> indicates that fp is the default - but that does not appear to be working.
> >
> > '-g' takes optional parameter, so having it in front of
> > non option string is causing the error, you could use:
> >
> > ./perf record -g -- ~/hackbench 10
> 
> Yes, I think this is an unfortunate change to break some user's scripts.  
> It'd be great if it detect whether the next argument belongs to the 
> option, or if not, pass it to next normally - but it seems to be not so 
> simple IMHO.

So, why not keep -g as a shortcut to whatever default call-graph profiling 
we want to provide (note, this does not mean it always has to be 'fp'), 
and use --call-graph for more specific variants?

a .perfconfig value could even set the default for '-g', so that you don't 
have to type '--call-graph dwarf' all the time.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-11 12:59       ` Ingo Molnar
@ 2013-10-11 13:04         ` Peter Zijlstra
  0 siblings, 0 replies; 91+ messages in thread
From: Peter Zijlstra @ 2013-10-11 13:04 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Namhyung Kim, Jiri Olsa, Arnaldo Carvalho de Melo, Paul Mackerras,
	Namhyung Kim, LKML, Linus Torvalds, Frederic Weisbecker

On Fri, Oct 11, 2013 at 02:59:35PM +0200, Ingo Molnar wrote:
> So, why not keep -g as a shortcut to whatever default call-graph profiling 
> we want to provide (note, this does not mean it always has to be 'fp'), 
> and use --call-graph for more specific variants?
> 
> a .perfconfig value could even set the default for '-g', so that you don't 
> have to type '--call-graph dwarf' all the time.


Another, slightly related annoyance I have is with -B, we default to
--big-num so -B is utterly useless, and the only remaining option is
--no-big-num.


^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-11  7:34   ` Jiri Olsa
  2013-10-11  8:24     ` Namhyung Kim
@ 2013-10-11 15:11     ` David Ahern
  2013-10-11 15:20       ` David Ahern
  1 sibling, 1 reply; 91+ messages in thread
From: David Ahern @ 2013-10-11 15:11 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Ingo Molnar, Namhyung Kim, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Paul Mackerras, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker

On 10/11/13 1:34 AM, Jiri Olsa wrote:
> '-g' takes optional parameter, so having it in front of
> non option string is causing the error, you could use:
>
> ./perf record -g -- ~/hackbench 10
>
> maybe we could display just only help string of the option
> we failed to process in this case

That's a syntax change introduced I believe when you added dwarf 
support. Every now and then I slip up and type -ga -- something that 
worked up until the callchain arg was expanded -- and now it fails as -g 
wants to look at 'a' as an argument to it.

It would be nice to fix the callchain arg handler to not attempt to 
process the next argument if it is not fp or dwarf.

David


^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-11 15:11     ` David Ahern
@ 2013-10-11 15:20       ` David Ahern
  2013-10-11 21:51         ` Andi Kleen
  2013-10-12 16:53         ` Ingo Molnar
  0 siblings, 2 replies; 91+ messages in thread
From: David Ahern @ 2013-10-11 15:20 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Ingo Molnar, Namhyung Kim, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Paul Mackerras, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker

On 10/11/13 9:11 AM, David Ahern wrote:
> It would be nice to fix the callchain arg handler to not attempt to
> process the next argument if it is not fp or dwarf.

Specifically, something like this which maintains syntax and default fp 
option:

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 92ca541..23d782c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -766,8 +766,8 @@ int record_parse_callchain_opt(const struct option *opt,
                      opts->stack_dump_size);
  #endif /* HAVE_LIBUNWIND_SUPPORT */
         } else {
-           pr_err("callchain: Unknown -g option "
-                  "value: %s\n", arg);
+           opts->call_graph = CALLCHAIN_FP;
+           ret = 0;
             break;
         }



^ permalink raw reply related	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-11 15:20       ` David Ahern
@ 2013-10-11 21:51         ` Andi Kleen
  2013-10-11 22:04           ` David Ahern
  2013-10-12 16:53         ` Ingo Molnar
  1 sibling, 1 reply; 91+ messages in thread
From: Andi Kleen @ 2013-10-11 21:51 UTC (permalink / raw)
  To: David Ahern
  Cc: Jiri Olsa, Ingo Molnar, Namhyung Kim, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Paul Mackerras, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker

David Ahern <dsahern@gmail.com> writes:

> On 10/11/13 9:11 AM, David Ahern wrote:
>> It would be nice to fix the callchain arg handler to not attempt to
>> process the next argument if it is not fp or dwarf.
>
> Specifically, something like this which maintains syntax and default
> fp option:

Yes please! This happens to me all the time too

(usually I train myself to use -g --, but i still sometimes forget)

It still wouldn't handle -ga, but naked -g seems to be the common case.

-Andi

>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 92ca541..23d782c 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -766,8 +766,8 @@ int record_parse_callchain_opt(const struct option *opt,
>                      opts->stack_dump_size);
>  #endif /* HAVE_LIBUNWIND_SUPPORT */
>         } else {
> -           pr_err("callchain: Unknown -g option "
> -                  "value: %s\n", arg);
> +           opts->call_graph = CALLCHAIN_FP;
> +           ret = 0;
>             break;
>         }
>
>

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-11 21:51         ` Andi Kleen
@ 2013-10-11 22:04           ` David Ahern
  2013-10-13 10:25             ` Jiri Olsa
  0 siblings, 1 reply; 91+ messages in thread
From: David Ahern @ 2013-10-11 22:04 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Jiri Olsa, Ingo Molnar, Namhyung Kim, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Paul Mackerras, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker

On 10/11/13 3:51 PM, Andi Kleen wrote:
> David Ahern <dsahern@gmail.com> writes:
>
>> On 10/11/13 9:11 AM, David Ahern wrote:
>>> It would be nice to fix the callchain arg handler to not attempt to
>>> process the next argument if it is not fp or dwarf.
>>
>> Specifically, something like this which maintains syntax and default
>> fp option:
>
> Yes please! This happens to me all the time too
>
> (usually I train myself to use -g --, but i still sometimes forget)
>
> It still wouldn't handle -ga, but naked -g seems to be the common case.

grrr... you're right. I ran right through that. With -ga the 'a' gets lost.

This seems to do the trick:

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 92ca541..726d2c2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -721,7 +721,11 @@ int record_parse_callchain_opt(const struct option 
*opt,
         return 0;

     /* We specified default option if none is provided. */
-   BUG_ON(!arg);
+   if (!arg) {
+       opts->call_graph = CALLCHAIN_FP;
+       return 0;
+   }
+

     /* We need buffer that we know we can write to. */
     buf = malloc(strlen(arg) + 1);
@@ -766,8 +770,8 @@ int record_parse_callchain_opt(const struct option *opt,
                      opts->stack_dump_size);
  #endif /* HAVE_LIBUNWIND_SUPPORT */
         } else {
-           pr_err("callchain: Unknown -g option "
-                  "value: %s\n", arg);
+           opts->call_graph = CALLCHAIN_FP;
+           ret = 0;
             break;
         }

@@ -855,7 +859,7 @@ const struct option record_options[] = {
              perf_evlist__parse_mmap_pages),
     OPT_BOOLEAN(0, "group", &record.opts.group,
             "put the counters into a counter group"),
-   OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
+   OPT_CALLBACK_DEFAULT_NOOPT('g', "call-graph", &record.opts,
                  "mode[,dump_size]", record_callchain_help,
                  &record_parse_callchain_opt, "fp"),
     OPT_INCR('v', "verbose", &verbose,

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-11 15:20       ` David Ahern
  2013-10-11 21:51         ` Andi Kleen
@ 2013-10-12 16:53         ` Ingo Molnar
  2013-10-12 19:42           ` David Ahern
  1 sibling, 1 reply; 91+ messages in thread
From: Ingo Molnar @ 2013-10-12 16:53 UTC (permalink / raw)
  To: David Ahern
  Cc: Jiri Olsa, Namhyung Kim, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Paul Mackerras, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker


* David Ahern <dsahern@gmail.com> wrote:

> On 10/11/13 9:11 AM, David Ahern wrote:
> >It would be nice to fix the callchain arg handler to not attempt to
> >process the next argument if it is not fp or dwarf.
> 
> Specifically, something like this which maintains syntax and default
> fp option:

Lets not maintain a broken syntax please - breaking the very simple -ga 
was the primary regression to begin with, so lets fix that.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-12 16:53         ` Ingo Molnar
@ 2013-10-12 19:42           ` David Ahern
  2013-10-13  5:23             ` Ingo Molnar
  0 siblings, 1 reply; 91+ messages in thread
From: David Ahern @ 2013-10-12 19:42 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jiri Olsa, Namhyung Kim, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Paul Mackerras, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker

On 10/12/13 10:53 AM, Ingo Molnar wrote:
>
> * David Ahern <dsahern@gmail.com> wrote:
>
>> On 10/11/13 9:11 AM, David Ahern wrote:
>>> It would be nice to fix the callchain arg handler to not attempt to
>>> process the next argument if it is not fp or dwarf.
>>
>> Specifically, something like this which maintains syntax and default
>> fp option:
>
> Lets not maintain a broken syntax please - breaking the very simple -ga
> was the primary regression to begin with, so lets fix that.

I think I did with the second follow up patch: -ga -ag -g fp -g dwarf 
should all work properly with fp the default for -g.

David


^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-12 19:42           ` David Ahern
@ 2013-10-13  5:23             ` Ingo Molnar
  2013-10-25 19:09               ` RFP: Fixing "-ga -ag -g fp -g dwarf" was " Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 91+ messages in thread
From: Ingo Molnar @ 2013-10-13  5:23 UTC (permalink / raw)
  To: David Ahern
  Cc: Jiri Olsa, Namhyung Kim, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Paul Mackerras, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker


* David Ahern <dsahern@gmail.com> wrote:

> On 10/12/13 10:53 AM, Ingo Molnar wrote:
> >
> >* David Ahern <dsahern@gmail.com> wrote:
> >
> >>On 10/11/13 9:11 AM, David Ahern wrote:
> >>>It would be nice to fix the callchain arg handler to not attempt to
> >>>process the next argument if it is not fp or dwarf.
> >>
> >>Specifically, something like this which maintains syntax and default
> >>fp option:
> >
> >Lets not maintain a broken syntax please - breaking the very simple -ga
> >was the primary regression to begin with, so lets fix that.
> 
> I think I did with the second follow up patch: -ga -ag -g fp -g
> dwarf should all work properly with fp the default for -g.

Acked-by: Ingo Molnar <mingo@kernel.org>

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-11 22:04           ` David Ahern
@ 2013-10-13 10:25             ` Jiri Olsa
  2013-10-13 21:18               ` [RFC] perf record,top: Add callchain option into .perfconfig Jiri Olsa
  2013-10-14  7:56               ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Ingo Molnar
  0 siblings, 2 replies; 91+ messages in thread
From: Jiri Olsa @ 2013-10-13 10:25 UTC (permalink / raw)
  To: David Ahern
  Cc: Andi Kleen, Ingo Molnar, Namhyung Kim, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Paul Mackerras, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker

On Fri, Oct 11, 2013 at 04:04:43PM -0600, David Ahern wrote:
> On 10/11/13 3:51 PM, Andi Kleen wrote:
> >David Ahern <dsahern@gmail.com> writes:
> >
> >>On 10/11/13 9:11 AM, David Ahern wrote:
> >>>It would be nice to fix the callchain arg handler to not attempt to
> >>>process the next argument if it is not fp or dwarf.
> >>
> >>Specifically, something like this which maintains syntax and default
> >>fp option:
> >
> >Yes please! This happens to me all the time too
> >
> >(usually I train myself to use -g --, but i still sometimes forget)
> >
> >It still wouldn't handle -ga, but naked -g seems to be the common case.
> 
> grrr... you're right. I ran right through that. With -ga the 'a' gets lost.
> 
> This seems to do the trick:
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 92ca541..726d2c2 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -721,7 +721,11 @@ int record_parse_callchain_opt(const struct
> option *opt,
>         return 0;
> 
>     /* We specified default option if none is provided. */
> -   BUG_ON(!arg);
> +   if (!arg) {
> +       opts->call_graph = CALLCHAIN_FP;
> +       return 0;
> +   }
> +
> 
>     /* We need buffer that we know we can write to. */
>     buf = malloc(strlen(arg) + 1);
> @@ -766,8 +770,8 @@ int record_parse_callchain_opt(const struct option *opt,
>                      opts->stack_dump_size);
>  #endif /* HAVE_LIBUNWIND_SUPPORT */
>         } else {
> -           pr_err("callchain: Unknown -g option "
> -                  "value: %s\n", arg);
> +           opts->call_graph = CALLCHAIN_FP;
> +           ret = 0;
>             break;
>         }
> 
> @@ -855,7 +859,7 @@ const struct option record_options[] = {
>              perf_evlist__parse_mmap_pages),
>     OPT_BOOLEAN(0, "group", &record.opts.group,
>             "put the counters into a counter group"),
> -   OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
> +   OPT_CALLBACK_DEFAULT_NOOPT('g', "call-graph", &record.opts,

hum, this disables the option completely, no?

The issue is a consequence of allowing '-g' to have carry
a value (dwarf,fp). Maybe we could have some sort of new
OPT_OPTION type, where if its value is not recognized it
would be passed to next option.

Or the way Ingo suggested earlier:

---
So, why not keep -g as a shortcut to whatever default call-graph profiling
we want to provide (note, this does not mean it always has to be 'fp'),
and use --call-graph for more specific variants?

a .perfconfig value could even set the default for '-g', so that you don't
have to type '--call-graph dwarf' all the time.
---

I'll try to come up with something later today

jirka

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-11  5:15 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Namhyung Kim
                   ` (8 preceding siblings ...)
  2013-10-11  5:58 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Ingo Molnar
@ 2013-10-13 12:34 ` Jiri Olsa
  2013-10-14  1:06   ` Namhyung Kim
  2013-10-14  4:50   ` Namhyung Kim
  9 siblings, 2 replies; 91+ messages in thread
From: Jiri Olsa @ 2013-10-13 12:34 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker

On Fri, Oct 11, 2013 at 02:15:35PM +0900, Namhyung Kim wrote:
> Hello,
> 
> This is a new version of callchain improvement patchset.  Basically
> it's almost same as v4 but rebased on current acme/perf/core and some
> functions are renamed as Frederic requested.
> 
> Now I'm hunting down a bug in 'perf report -s sym' which was found
> during the test, but I think it's not related to this change as it can
> be reproduced in earlier versions too.
> 
> I put this series on 'perf/callchain-v5' branch in my tree
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
hi,
while hunting another issue, I got segfault in callchains code:

Press '?' for help on key bindings
Program received signal SIGSEGV, Segmentation fault.
0x00000000004dcd89 in callchain_node__init_have_children_rb_tree (node=0x21a0330) at ui/browsers/hists.c:158
158                                     chain->ms.has_children = chain->list.next == &child->val &&
Missing separate debuginfos, use: debuginfo-install audit-libs-2.2.1-1.fc16.x86_64 elfutils-libelf-0.154-2.fc16.x86_64 elfutils-libs-0.154-2.fc16.x86_64 glibc-2.14.90-24.fc16.9.x86_64 libunwind-0.99-2.20110424git1e10c293.fc16.x86_64 nss-softokn-freebl-3.14.1-3.fc16.x86_64 numactl-2.0.7-2.fc16.x86_64 perl-libs-5.14.3-205.fc16.x86_64 python-libs-2.7.3-4.fc16.x86_64 slang-2.2.4-1.fc16.x86_64 xz-libs-5.1.1-1alpha.fc16.x86_64 zlib-1.2.5-7.fc16.x86_64
(gdb) bt
#0  0x00000000004dcd89 in callchain_node__init_have_children_rb_tree (node=0x21a0330) at ui/browsers/hists.c:158
#1  0x00000000004dcdf9 in callchain_node__init_have_children_rb_tree (node=0x1fa90a0) at ui/browsers/hists.c:162
#2  0x00000000004dcead in callchain_node__init_have_children (node=0x1fa90a0) at ui/browsers/hists.c:173
#3  0x00000000004dcf10 in callchain__init_have_children (root=0x1fa8ff8) at ui/browsers/hists.c:182
#4  0x00000000004dcf97 in hist_entry__init_have_children (he=0x1fa8f00) at ui/browsers/hists.c:190
#5  0x00000000004debf3 in hist_browser__show_entry (browser=0xa27010, entry=0x1fa8f00, row=66) at ui/browsers/hists.c:739
#6  0x00000000004df062 in hist_browser__refresh (browser=0xa27010) at ui/browsers/hists.c:823
#7  0x00000000004d8487 in __ui_browser__refresh (browser=0xa27010) at ui/browser.c:307
#8  0x00000000004d8681 in ui_browser__run (browser=0xa27010, delay_secs=0) at ui/browser.c:362
#9  0x00000000004dd6a7 in hist_browser__run (browser=0xa27010, ev_name=0xa26ff0 "cycles:pp", hbt=0x0) at ui/browsers/hists.c:335
#10 0x00000000004e0d9b in perf_evsel__hists_browse (evsel=0xa24b60, nr_events=1, 
    helpline=0x58e588 "For a higher level overview, try: perf report --sort comm,dso", ev_name=0xa26ff0 "cycles:pp", left_exits=false, 
    hbt=0x0, min_pcnt=0, env=0xa23f90) at ui/browsers/hists.c:1430
#11 0x00000000004e2605 in perf_evlist__tui_browse_hists (evlist=0xa242e0, 
    help=0x58e588 "For a higher level overview, try: perf report --sort comm,dso", hbt=0x0, min_pcnt=0, env=0xa23f90)
    at ui/browsers/hists.c:1950
#12 0x000000000042c3cb in __cmd_report (rep=0x7fffffffdf40) at builtin-report.c:590
#13 0x000000000042d84a in cmd_report (argc=0, argv=0x7fffffffe3a0, prefix=0x0) at builtin-report.c:988
#14 0x0000000000418fed in run_builtin (p=0x803c28, argc=5, argv=0x7fffffffe3a0) at perf.c:319
#15 0x0000000000419225 in handle_internal_command (argc=5, argv=0x7fffffffe3a0) at perf.c:376
#16 0x0000000000419371 in run_argv (argcp=0x7fffffffe27c, argv=0x7fffffffe270) at perf.c:420
#17 0x0000000000419658 in main (argc=5, argv=0x7fffffffe3a0) at perf.c:529


I got it on the original code, so I've tried your's
perf/callchain-v5 with same result.

I put perf archive and data output in here if you're interested:
http://people.redhat.com/jolsa/cc/

thanks,
jirka

^ permalink raw reply	[flat|nested] 91+ messages in thread

* [RFC] perf record,top: Add callchain option into .perfconfig
  2013-10-13 10:25             ` Jiri Olsa
@ 2013-10-13 21:18               ` Jiri Olsa
  2013-10-13 21:32                 ` Andi Kleen
  2013-10-14  7:56               ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Ingo Molnar
  1 sibling, 1 reply; 91+ messages in thread
From: Jiri Olsa @ 2013-10-13 21:18 UTC (permalink / raw)
  To: David Ahern
  Cc: Andi Kleen, Ingo Molnar, Namhyung Kim, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Paul Mackerras, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker

On Sun, Oct 13, 2013 at 12:25:21PM +0200, Jiri Olsa wrote:

SNIP

> 
> I'll try to come up with something later today
> 
> jirka


hi,
here it is.. not fully tested, no doc updates, dont want
to go too far before we agreed on this ;-)

thanks for comments,
jirka

--
The callchain option is now used only to enable
callchains. By default it's framepointer type.

If dwarf unwind is needed, following option needs
to be added into .perfconfig:

for top command:
  [top]
        call-graph = dwarf,8192

for record command:
  [record]
        call-graph = dwarf,8192

running perf with above config like this:
perf record -g ls
perf top -G

will enable dwarf unwind. The config file option
by itself does not enable the callchain, the -g/-G
option does.

TODO: doc/help needs to be updated

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
---
 tools/perf/builtin-record.c | 58 ++++++++++++++++++++++++++++++---------------
 tools/perf/builtin-top.c    | 23 +++++++++++-------
 tools/perf/perf.h           |  1 +
 tools/perf/util/callchain.h |  4 +++-
 tools/perf/util/evsel.c     |  2 +-
 5 files changed, 58 insertions(+), 30 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 92ca541..9b245d3 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -709,19 +709,35 @@ static int get_stack_size(char *str, unsigned long *_size)
 #endif /* HAVE_LIBUNWIND_SUPPORT */
 
 int record_parse_callchain_opt(const struct option *opt,
-			       const char *arg, int unset)
+			       const char *arg __maybe_unused,
+			       int unset)
 {
 	struct perf_record_opts *opts = opt->value;
-	char *tok, *name, *saveptr = NULL;
-	char *buf;
-	int ret = -1;
+
+	opts->call_graph_enabled = !unset;
 
 	/* --no-call-graph */
-	if (unset)
+	if (unset) {
+		opts->call_graph = CALLCHAIN_NONE;
 		return 0;
+	}
+
+	if (opts->call_graph == CALLCHAIN_NONE)
+		opts->call_graph = CALLCHAIN_FP;
+
+	pr_debug("callchain: type %d\n", opts->call_graph);
+
+	if (opts->call_graph == CALLCHAIN_DWARF)
+		pr_debug("callchain: stack dump size %d\n",
+			 opts->stack_dump_size);
+	return 0;
+}
 
-	/* We specified default option if none is provided. */
-	BUG_ON(!arg);
+int record_parse_callchain(const char *arg, struct perf_record_opts *opts)
+{
+	char *tok, *name, *saveptr = NULL;
+	char *buf;
+	int ret = -1;
 
 	/* We need buffer that we know we can write to. */
 	buf = malloc(strlen(arg) + 1);
@@ -740,7 +756,7 @@ int record_parse_callchain_opt(const struct option *opt,
 				opts->call_graph = CALLCHAIN_FP;
 				ret = 0;
 			} else
-				pr_err("callchain: No more arguments "
+				fprintf(stderr, "callchain: No more arguments "
 				       "needed for -g fp\n");
 			break;
 
@@ -760,13 +776,9 @@ int record_parse_callchain_opt(const struct option *opt,
 				ret = get_stack_size(tok, &size);
 				opts->stack_dump_size = size;
 			}
-
-			if (!ret)
-				pr_debug("callchain: stack dump size %d\n",
-					 opts->stack_dump_size);
 #endif /* HAVE_LIBUNWIND_SUPPORT */
 		} else {
-			pr_err("callchain: Unknown -g option "
+			fprintf(stderr, "callchain: Unknown -g option "
 			       "value: %s\n", arg);
 			break;
 		}
@@ -774,11 +786,17 @@ int record_parse_callchain_opt(const struct option *opt,
 	} while (0);
 
 	free(buf);
+	return ret;
+}
 
-	if (!ret)
-		pr_debug("callchain: type %d\n", opts->call_graph);
+static int perf_record_config(const char *var, const char *value, void *cb)
+{
+	struct perf_record *rec = cb;
 
-	return ret;
+	if (!strcmp(var, "record.call-graph"))
+		return record_parse_callchain(value, &rec->opts);
+
+	return perf_default_config(var, value, cb);
 }
 
 static const char * const record_usage[] = {
@@ -855,9 +873,9 @@ const struct option record_options[] = {
 		     perf_evlist__parse_mmap_pages),
 	OPT_BOOLEAN(0, "group", &record.opts.group,
 		    "put the counters into a counter group"),
-	OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
-			     "mode[,dump_size]", record_callchain_help,
-			     &record_parse_callchain_opt, "fp"),
+	OPT_CALLBACK_NOOPT('g', "call-graph", &record.opts,
+			   "mode[,dump_size]", record_callchain_help,
+			   &record_parse_callchain_opt),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show counter open errors, etc)"),
 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
@@ -906,6 +924,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	rec->evlist = evsel_list;
 
+	perf_config(perf_record_config, rec);
+
 	argc = parse_options(argc, argv, record_options, record_usage,
 			    PARSE_OPT_STOP_AT_NON_OPTION);
 	if (!argc && perf_target__none(&rec->opts.target))
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 65c49b2..b8e1b96 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1017,17 +1017,20 @@ out_delete:
 static int
 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 {
-	/*
-	 * --no-call-graph
-	 */
-	if (unset)
-		return 0;
-
 	symbol_conf.use_callchain = true;
-
 	return record_parse_callchain_opt(opt, arg, unset);
 }
 
+static int perf_top_config(const char *var, const char *value, void *cb)
+{
+	struct perf_top *top = cb;
+
+	if (!strcmp(var, "top.call-graph"))
+		return record_parse_callchain(value, &top->record_opts);
+
+	return perf_default_config(var, value, cb);
+}
+
 static int
 parse_percent_limit(const struct option *opt, const char *arg,
 		    int unset __maybe_unused)
@@ -1109,9 +1112,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 		   " abort, in_tx, transaction"),
 	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
 		    "Show a column with the number of samples"),
-	OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
+	OPT_CALLBACK_NOOPT('G', "call-graph", &top.record_opts,
 			     "mode[,dump_size]", record_callchain_help,
-			     &parse_callchain_opt, "fp"),
+			     &parse_callchain_opt),
 	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
 		   "ignore callees of these functions in call graphs",
 		   report_parse_ignore_callees_opt),
@@ -1145,6 +1148,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (top.evlist == NULL)
 		return -ENOMEM;
 
+	perf_config(perf_top_config, &top);
+
 	argc = parse_options(argc, argv, options, top_usage, 0);
 	if (argc)
 		usage_with_options(top_usage, options);
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 84502e8..fed8903 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -215,6 +215,7 @@ enum perf_call_graph_mode {
 struct perf_record_opts {
 	struct perf_target target;
 	int	     call_graph;
+	bool         call_graph_enabled;
 	bool	     group;
 	bool	     inherit_stat;
 	bool	     no_delay;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 2b585bc..b3e796b 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -147,6 +147,8 @@ static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
 
 struct option;
 
-int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
+int record_parse_callchain_opt(const struct option *opt, const char *arg,
+			       int unset);
+int record_parse_callchain(const char *arg, struct perf_record_opts *rec);
 extern const char record_callchain_help[];
 #endif	/* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index bfebc1e..8f3dd33 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -633,7 +633,7 @@ void perf_evsel__config(struct perf_evsel *evsel,
 		attr->mmap_data = track;
 	}
 
-	if (opts->call_graph) {
+	if (opts->call_graph_enabled) {
 		perf_evsel__set_sample_bit(evsel, CALLCHAIN);
 
 		if (opts->call_graph == CALLCHAIN_DWARF) {
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 91+ messages in thread

* Re: [RFC] perf record,top: Add callchain option into .perfconfig
  2013-10-13 21:18               ` [RFC] perf record,top: Add callchain option into .perfconfig Jiri Olsa
@ 2013-10-13 21:32                 ` Andi Kleen
  0 siblings, 0 replies; 91+ messages in thread
From: Andi Kleen @ 2013-10-13 21:32 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: David Ahern, Andi Kleen, Ingo Molnar, Namhyung Kim,
	Arnaldo Carvalho de Melo, Peter Zijlstra, Paul Mackerras,
	Namhyung Kim, LKML, Linus Torvalds, Frederic Weisbecker

On Sun, Oct 13, 2013 at 11:18:05PM +0200, Jiri Olsa wrote:
> On Sun, Oct 13, 2013 at 12:25:21PM +0200, Jiri Olsa wrote:
> 
> SNIP
> 
> > 
> > I'll try to come up with something later today
> > 
> > jirka
> 
> 
> hi,
> here it is.. not fully tested, no doc updates, dont want
> to go too far before we agreed on this ;-)

I don't think that's a good idea. Still need some way to control
dwarf from the command line.

-Andi

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-13 12:34 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Jiri Olsa
@ 2013-10-14  1:06   ` Namhyung Kim
  2013-10-14  4:50   ` Namhyung Kim
  1 sibling, 0 replies; 91+ messages in thread
From: Namhyung Kim @ 2013-10-14  1:06 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Arnaldo Carvalho de Melo, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker

Hi Jiri,

On Sun, 13 Oct 2013 14:34:44 +0200, Jiri Olsa wrote:
> On Fri, Oct 11, 2013 at 02:15:35PM +0900, Namhyung Kim wrote:
>> Hello,
>> 
>> This is a new version of callchain improvement patchset.  Basically
>> it's almost same as v4 but rebased on current acme/perf/core and some
>> functions are renamed as Frederic requested.
>> 
>> Now I'm hunting down a bug in 'perf report -s sym' which was found
>> during the test, but I think it's not related to this change as it can
>> be reproduced in earlier versions too.
>> 
>> I put this series on 'perf/callchain-v5' branch in my tree
>> 
>>   git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
> hi,
> while hunting another issue, I got segfault in callchains code:
>
> Press '?' for help on key bindings
> Program received signal SIGSEGV, Segmentation fault.
> 0x00000000004dcd89 in callchain_node__init_have_children_rb_tree (node=0x21a0330) at ui/browsers/hists.c:158
> 158                                     chain->ms.has_children = chain->list.next == &child->val &&
> Missing separate debuginfos, use: debuginfo-install
> audit-libs-2.2.1-1.fc16.x86_64 elfutils-libelf-0.154-2.fc16.x86_64
> elfutils-libs-0.154-2.fc16.x86_64 glibc-2.14.90-24.fc16.9.x86_64
> libunwind-0.99-2.20110424git1e10c293.fc16.x86_64
> nss-softokn-freebl-3.14.1-3.fc16.x86_64 numactl-2.0.7-2.fc16.x86_64
> perl-libs-5.14.3-205.fc16.x86_64 python-libs-2.7.3-4.fc16.x86_64
> slang-2.2.4-1.fc16.x86_64 xz-libs-5.1.1-1alpha.fc16.x86_64
> zlib-1.2.5-7.fc16.x86_64
> (gdb) bt
> #0  0x00000000004dcd89 in callchain_node__init_have_children_rb_tree (node=0x21a0330) at ui/browsers/hists.c:158
> #1  0x00000000004dcdf9 in callchain_node__init_have_children_rb_tree (node=0x1fa90a0) at ui/browsers/hists.c:162
> #2  0x00000000004dcead in callchain_node__init_have_children (node=0x1fa90a0) at ui/browsers/hists.c:173
> #3  0x00000000004dcf10 in callchain__init_have_children (root=0x1fa8ff8) at ui/browsers/hists.c:182
> #4  0x00000000004dcf97 in hist_entry__init_have_children (he=0x1fa8f00) at ui/browsers/hists.c:190
> #5  0x00000000004debf3 in hist_browser__show_entry (browser=0xa27010, entry=0x1fa8f00, row=66) at ui/browsers/hists.c:739
> #6  0x00000000004df062 in hist_browser__refresh (browser=0xa27010) at ui/browsers/hists.c:823
> #7  0x00000000004d8487 in __ui_browser__refresh (browser=0xa27010) at ui/browser.c:307
> #8  0x00000000004d8681 in ui_browser__run (browser=0xa27010, delay_secs=0) at ui/browser.c:362
> #9  0x00000000004dd6a7 in hist_browser__run (browser=0xa27010, ev_name=0xa26ff0 "cycles:pp", hbt=0x0) at ui/browsers/hists.c:335
> #10 0x00000000004e0d9b in perf_evsel__hists_browse (evsel=0xa24b60, nr_events=1, 
>     helpline=0x58e588 "For a higher level overview, try: perf report --sort comm,dso", ev_name=0xa26ff0 "cycles:pp", left_exits=false, 
>     hbt=0x0, min_pcnt=0, env=0xa23f90) at ui/browsers/hists.c:1430
> #11 0x00000000004e2605 in perf_evlist__tui_browse_hists (evlist=0xa242e0, 
>     help=0x58e588 "For a higher level overview, try: perf report --sort comm,dso", hbt=0x0, min_pcnt=0, env=0xa23f90)
>     at ui/browsers/hists.c:1950
> #12 0x000000000042c3cb in __cmd_report (rep=0x7fffffffdf40) at builtin-report.c:590
> #13 0x000000000042d84a in cmd_report (argc=0, argv=0x7fffffffe3a0, prefix=0x0) at builtin-report.c:988
> #14 0x0000000000418fed in run_builtin (p=0x803c28, argc=5, argv=0x7fffffffe3a0) at perf.c:319
> #15 0x0000000000419225 in handle_internal_command (argc=5, argv=0x7fffffffe3a0) at perf.c:376
> #16 0x0000000000419371 in run_argv (argcp=0x7fffffffe27c, argv=0x7fffffffe270) at perf.c:420
> #17 0x0000000000419658 in main (argc=5, argv=0x7fffffffe3a0) at perf.c:529
>
>
> I got it on the original code, so I've tried your's
> perf/callchain-v5 with same result.
>
> I put perf archive and data output in here if you're interested:
> http://people.redhat.com/jolsa/cc/

Thanks, I'll take a look at this today.
Namhyung

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-13 12:34 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Jiri Olsa
  2013-10-14  1:06   ` Namhyung Kim
@ 2013-10-14  4:50   ` Namhyung Kim
  2013-10-14  8:01     ` Jiri Olsa
  1 sibling, 1 reply; 91+ messages in thread
From: Namhyung Kim @ 2013-10-14  4:50 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Arnaldo Carvalho de Melo, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker

On Sun, 13 Oct 2013 14:34:44 +0200, Jiri Olsa wrote:
> I put perf archive and data output in here if you're interested:
> http://people.redhat.com/jolsa/cc/

I can't download the data output (but the archive is fine).

  $ curl http://people.redhat.com/jolsa/cc/perf.data
  <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
  <html><head>
  <title>403 Forbidden</title>
  </head><body>
  <h1>Forbidden</h1>
  <p>You don't have permission to access /jolsa/cc/perf.data
  on this server.</p>
  <hr>
  <address>Apache Server at people.redhat.com Port 80</address>
  </body></html>


Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-13 10:25             ` Jiri Olsa
  2013-10-13 21:18               ` [RFC] perf record,top: Add callchain option into .perfconfig Jiri Olsa
@ 2013-10-14  7:56               ` Ingo Molnar
  1 sibling, 0 replies; 91+ messages in thread
From: Ingo Molnar @ 2013-10-14  7:56 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: David Ahern, Andi Kleen, Namhyung Kim, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Paul Mackerras, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker


* Jiri Olsa <jolsa@redhat.com> wrote:

> > -   OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
> > +   OPT_CALLBACK_DEFAULT_NOOPT('g', "call-graph", &record.opts,
> 
> hum, this disables the option completely, no?
> 
> The issue is a consequence of allowing '-g' to have carry
> a value (dwarf,fp). Maybe we could have some sort of new
> OPT_OPTION type, where if its value is not recognized it
> would be passed to next option.
> 
> Or the way Ingo suggested earlier:
> 
> ---
> So, why not keep -g as a shortcut to whatever default call-graph profiling
> we want to provide (note, this does not mean it always has to be 'fp'),
> and use --call-graph for more specific variants?
> 
> a .perfconfig value could even set the default for '-g', so that you don't
> have to type '--call-graph dwarf' all the time.
> ---
> 
> I'll try to come up with something later today

So, I think we should split -g/--call-graph into _two_, separate options:

	--call-graph <variant>
	-g

instead of turning the '<variant>' configurability off. Whatever is 
configurable via .perfconfig ought to also have a command line 
counterpart. This is pretty fundamental.

Ack on the .perfconfig aspect to choose a default for -g.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-14  4:50   ` Namhyung Kim
@ 2013-10-14  8:01     ` Jiri Olsa
  2013-10-14  8:41       ` Namhyung Kim
  0 siblings, 1 reply; 91+ messages in thread
From: Jiri Olsa @ 2013-10-14  8:01 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker

On Mon, Oct 14, 2013 at 01:50:09PM +0900, Namhyung Kim wrote:
> On Sun, 13 Oct 2013 14:34:44 +0200, Jiri Olsa wrote:
> > I put perf archive and data output in here if you're interested:
> > http://people.redhat.com/jolsa/cc/
> 
> I can't download the data output (but the archive is fine).
> 
>   $ curl http://people.redhat.com/jolsa/cc/perf.data
>   <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
>   <html><head>
>   <title>403 Forbidden</title>
>   </head><body>
>   <h1>Forbidden</h1>
>   <p>You don't have permission to access /jolsa/cc/perf.data
>   on this server.</p>
>   <hr>
>   <address>Apache Server at people.redhat.com Port 80</address>
>   </body></html>

oops, should be fixed now.. haven't checked persmissions :-\

jirka

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-14  8:01     ` Jiri Olsa
@ 2013-10-14  8:41       ` Namhyung Kim
  0 siblings, 0 replies; 91+ messages in thread
From: Namhyung Kim @ 2013-10-14  8:41 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Arnaldo Carvalho de Melo, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker

Hi Jiri,

On Mon, 14 Oct 2013 10:01:08 +0200, Jiri Olsa wrote:
> On Mon, Oct 14, 2013 at 01:50:09PM +0900, Namhyung Kim wrote:
>> On Sun, 13 Oct 2013 14:34:44 +0200, Jiri Olsa wrote:
>> > I put perf archive and data output in here if you're interested:
>> > http://people.redhat.com/jolsa/cc/
>> 
>> I can't download the data output (but the archive is fine).
>> 
>>   $ curl http://people.redhat.com/jolsa/cc/perf.data
>>   <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
>>   <html><head>
>>   <title>403 Forbidden</title>
>>   </head><body>
>>   <h1>Forbidden</h1>
>>   <p>You don't have permission to access /jolsa/cc/perf.data
>>   on this server.</p>
>>   <hr>
>>   <address>Apache Server at people.redhat.com Port 80</address>
>>   </body></html>
>
> oops, should be fixed now.. haven't checked persmissions :-\

I got it now, thanks.

But I couldn't reproduce the segfault (both for TUI and stdio).
Do you have any scenario?

  # ========
  # captured on: Mon Oct 14 17:30:03 2013
  # hostname : dhcp-26-214.brq.redhat.com
  # os release : 3.12.0-rc2+
  # perf version : 3.12.rc2.g9de23a
  # arch : x86_64
  # nrcpus online : 2
  # nrcpus avail : 2
  # cpudesc : Intel(R) Core(TM)2 Duo CPU T9400 @ 2.53GHz
  # cpuid : GenuineIntel,6,23,10
  # total memory : 1968996 kB
  # cmdline : /home/jolsa/linux-perf/tools/perf/perf record -g -e cycles:pp make -j 
  # event : name = cycles:pp, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 
  # HEADER_CPU_TOPOLOGY info available, use -I to display
  # pmu mappings: cpu = 4, software = 1, tracepoint = 2, breakpoint = 5
  # ========
  #
  # Samples: 285K of event 'cycles:pp'
  # Event count (approx.): 173578957575
  ...

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 2/8] perf ui/progress: Add new helper functions for progress bar
  2013-10-11  5:15 ` [PATCH 2/8] perf ui/progress: Add new helper functions for progress bar Namhyung Kim
@ 2013-10-21 18:13   ` Arnaldo Carvalho de Melo
  2013-10-22 18:12     ` Namhyung Kim
  0 siblings, 1 reply; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-21 18:13 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker, Jiri Olsa

Em Fri, Oct 11, 2013 at 02:15:37PM +0900, Namhyung Kim escreveu:
> From: Namhyung Kim <namhyung.kim@lge.com>
> 
> Introduce ui_progress_setup() and ui_progress__advance() to separate
> out from the session logic.  It'll be used by other places in the
> upcoming patch.

Renaming this from 'perf_progress' to 'ui_progress', as the existing
method names implies, and also because this is not perf specific at all
:-)

- Arnaldo
 
> Cc: Jiri Olsa <jolsa@redhat.com>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/ui/progress.c  | 18 ++++++++++++++++++
>  tools/perf/ui/progress.h  | 10 ++++++++++
>  tools/perf/util/session.c | 24 ++++++++++++------------
>  3 files changed, 40 insertions(+), 12 deletions(-)
> 
> diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c
> index 3ec695607a4d..b5c22c6b1d93 100644
> --- a/tools/perf/ui/progress.c
> +++ b/tools/perf/ui/progress.c
> @@ -19,6 +19,24 @@ void ui_progress__update(u64 curr, u64 total, const char *title)
>  	return progress_fns->update(curr, total, title);
>  }
>  
> +void ui_progress__setup(struct perf_progress *p, u64 total)
> +{
> +	p->curr = 0;
> +	p->next = p->unit = total / 16;
> +	p->total = total;
> +
> +}
> +
> +void ui_progress__advance(struct perf_progress *p, u64 adv, const char *title)
> +{
> +	p->curr += adv;
> +
> +	if (p->curr >= p->next) {
> +		p->next += p->unit;
> +		ui_progress__update(p->curr, p->total, title);
> +	}
> +}
> +
>  void ui_progress__finish(void)
>  {
>  	if (progress_fns->finish)
> diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h
> index 257cc224f9cf..3bd23e825953 100644
> --- a/tools/perf/ui/progress.h
> +++ b/tools/perf/ui/progress.h
> @@ -8,10 +8,20 @@ struct ui_progress {
>  	void (*finish)(void);
>  };
>  
> +struct perf_progress {
> +	u64 curr;
> +	u64 next;
> +	u64 unit;
> +	u64 total;
> +};
> +
>  extern struct ui_progress *progress_fns;
>  
>  void ui_progress__init(void);
>  
> +void ui_progress__setup(struct perf_progress *p, u64 total);
> +void ui_progress__advance(struct perf_progress *p, u64 adv, const char *title);
> +
>  void ui_progress__update(u64 curr, u64 total, const char *title);
>  void ui_progress__finish(void);
>  
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index 6c1d4447c5b4..9f62be8bc167 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -538,13 +538,16 @@ static int flush_sample_queue(struct perf_session *s,
>  	struct perf_sample sample;
>  	u64 limit = os->next_flush;
>  	u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
> -	unsigned idx = 0, progress_next = os->nr_samples / 16;
>  	bool show_progress = limit == ULLONG_MAX;
> +	struct perf_progress prog;
>  	int ret;
>  
>  	if (!tool->ordered_samples || !limit)
>  		return 0;
>  
> +	if (show_progress)
> +		ui_progress__setup(&prog, os->nr_samples);
> +
>  	list_for_each_entry_safe(iter, tmp, head, list) {
>  		if (iter->timestamp > limit)
>  			break;
> @@ -562,10 +565,10 @@ static int flush_sample_queue(struct perf_session *s,
>  		os->last_flush = iter->timestamp;
>  		list_del(&iter->list);
>  		list_add(&iter->list, &os->sample_cache);
> -		if (show_progress && (++idx >= progress_next)) {
> -			progress_next += os->nr_samples / 16;
> -			ui_progress__update(idx, os->nr_samples,
> -					    "Processing time ordered events...");
> +
> +		if (show_progress) {
> +			ui_progress__advance(&prog, 1,
> +					     "Processing time ordered events...");
>  		}
>  	}
>  
> @@ -1310,12 +1313,13 @@ int __perf_session__process_events(struct perf_session *session,
>  				   u64 data_offset, u64 data_size,
>  				   u64 file_size, struct perf_tool *tool)
>  {
> -	u64 head, page_offset, file_offset, file_pos, progress_next;
> +	u64 head, page_offset, file_offset, file_pos;
>  	int err, mmap_prot, mmap_flags, map_idx = 0;
>  	size_t	mmap_size;
>  	char *buf, *mmaps[NUM_MMAPS];
>  	union perf_event *event;
>  	uint32_t size;
> +	struct perf_progress prog;
>  
>  	perf_tool__fill_defaults(tool);
>  
> @@ -1326,7 +1330,7 @@ int __perf_session__process_events(struct perf_session *session,
>  	if (data_offset + data_size < file_size)
>  		file_size = data_offset + data_size;
>  
> -	progress_next = file_size / 16;
> +	ui_progress__setup(&prog, file_size);
>  
>  	mmap_size = MMAP_SIZE;
>  	if (mmap_size > file_size)
> @@ -1381,11 +1385,7 @@ more:
>  	head += size;
>  	file_pos += size;
>  
> -	if (file_pos >= progress_next) {
> -		progress_next += file_size / 16;
> -		ui_progress__update(file_pos, file_size,
> -				    "Processing events...");
> -	}
> +	ui_progress__advance(&prog, size, "Processing events...");
>  
>  	if (file_pos < file_size)
>  		goto more;
> -- 
> 1.7.11.7

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 2/8] perf ui/progress: Add new helper functions for progress bar
  2013-10-22 18:12     ` Namhyung Kim
@ 2013-10-22 13:06       ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-22 13:06 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker, Jiri Olsa

Em Tue, Oct 22, 2013 at 06:12:40PM +0000, Namhyung Kim escreveu:
> On Mon, 21 Oct 2013 15:13:24 -0300, Arnaldo Carvalho de Melo wrote:
> > Em Fri, Oct 11, 2013 at 02:15:37PM +0900, Namhyung Kim escreveu:
> >> From: Namhyung Kim <namhyung.kim@lge.com>

> >> Introduce ui_progress_setup() and ui_progress__advance() to separate
> >> out from the session logic.  It'll be used by other places in the
> >> upcoming patch.

> > Renaming this from 'perf_progress' to 'ui_progress', as the existing
> > method names implies, and also because this is not perf specific at all
> > :-)

> I'm not sure I understood you correctly.  You're talking about the
> struct perf_progree right?  We already have struct ui_progress for
> saving UI-specific function pointers.  Am I missing something?

Yes, the existing 'struct ui_progress' is not a per instance progress
bar class, but a set of operations that an specific UI sets for use
after some init progress, so it is misnamed, it should be, following
kernel examples, ui_progress_ops, and then ui_progress can be used for
what you called perf_progress.

I'll work on that, enjoy Edinburgh! :-)
 
- Arnaldo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 2/8] perf ui/progress: Add new helper functions for progress bar
  2013-10-21 18:13   ` Arnaldo Carvalho de Melo
@ 2013-10-22 18:12     ` Namhyung Kim
  2013-10-22 13:06       ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 91+ messages in thread
From: Namhyung Kim @ 2013-10-22 18:12 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker, Jiri Olsa

Hi Arnaldo,

On Mon, 21 Oct 2013 15:13:24 -0300, Arnaldo Carvalho de Melo wrote:
> Em Fri, Oct 11, 2013 at 02:15:37PM +0900, Namhyung Kim escreveu:
>> From: Namhyung Kim <namhyung.kim@lge.com>
>> 
>> Introduce ui_progress_setup() and ui_progress__advance() to separate
>> out from the session logic.  It'll be used by other places in the
>> upcoming patch.
>
> Renaming this from 'perf_progress' to 'ui_progress', as the existing
> method names implies, and also because this is not perf specific at all
> :-)

I'm not sure I understood you correctly.  You're talking about the
struct perf_progree right?  We already have struct ui_progress for
saving UI-specific function pointers.  Am I missing something?

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 91+ messages in thread

* [tip:perf/core] perf callchain: Convert children list to rbtree
  2013-10-11  5:15 ` [PATCH 1/8] perf callchain: Convert children list to rbtree Namhyung Kim
@ 2013-10-23  7:54   ` tip-bot for Namhyung Kim
  2013-10-23 11:07     ` Frederic Weisbecker
  0 siblings, 1 reply; 91+ messages in thread
From: tip-bot for Namhyung Kim @ 2013-10-23  7:54 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: acme, linux-kernel, paulus, hpa, mingo, a.p.zijlstra, torvalds,
	namhyung.kim, namhyung, jolsa, fweisbec, tglx

Commit-ID:  e369517ce5f796945c6af047b4e8b1d650e03458
Gitweb:     http://git.kernel.org/tip/e369517ce5f796945c6af047b4e8b1d650e03458
Author:     Namhyung Kim <namhyung.kim@lge.com>
AuthorDate: Fri, 11 Oct 2013 14:15:36 +0900
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Mon, 21 Oct 2013 17:33:23 -0300

perf callchain: Convert children list to rbtree

Current collapse stage has a scalability problem which can be reproduced
easily with a parallel kernel build.

This is because it needs to traverse every children of callchains
linearly during the collapse/merge stage.

Converting it to a rbtree reduced the overhead significantly.

On my 400MB perf.data file which recorded with make -j32 kernel build:

  $ time perf --no-pager report --stdio > /dev/null

before:
  real	6m22.073s
  user	6m18.683s
  sys	0m0.706s

after:
  real	0m20.780s
  user	0m19.962s
  sys	0m0.689s

During the perf report the overhead on append_chain_children went down
from 96.69% to 18.16%:

  -  18.16%  perf  perf                [.] append_chain_children
     - append_chain_children
        - 77.48% append_chain_children
           + 69.79% merge_chain_branch
           - 22.96% append_chain_children
              + 67.44% merge_chain_branch
              + 30.15% append_chain_children
              + 2.41% callchain_append
           + 7.25% callchain_append
        + 12.26% callchain_append
        + 10.22% merge_chain_branch
  +  11.58%  perf  perf                [.] dso__find_symbol
  +   8.02%  perf  perf                [.] sort__comm_cmp
  +   5.48%  perf  libc-2.17.so        [.] malloc_consolidate

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1381468543-25334-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/callchain.c | 147 +++++++++++++++++++++++++++++++++-----------
 tools/perf/util/callchain.h |  11 ++--
 2 files changed, 116 insertions(+), 42 deletions(-)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 482f680..e3970e3 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -21,12 +21,6 @@
 
 __thread struct callchain_cursor callchain_cursor;
 
-#define chain_for_each_child(child, parent)	\
-	list_for_each_entry(child, &parent->children, siblings)
-
-#define chain_for_each_child_safe(child, next, parent)	\
-	list_for_each_entry_safe(child, next, &parent->children, siblings)
-
 static void
 rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
 		    enum chain_mode mode)
@@ -71,10 +65,16 @@ static void
 __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
 		  u64 min_hit)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 
-	chain_for_each_child(child, node)
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
 		__sort_chain_flat(rb_root, child, min_hit);
+	}
 
 	if (node->hit && node->hit >= min_hit)
 		rb_insert_callchain(rb_root, node, CHAIN_FLAT);
@@ -94,11 +94,16 @@ sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
 static void __sort_chain_graph_abs(struct callchain_node *node,
 				   u64 min_hit)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 
 	node->rb_root = RB_ROOT;
+	n = rb_first(&node->rb_root_in);
+
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
 
-	chain_for_each_child(child, node) {
 		__sort_chain_graph_abs(child, min_hit);
 		if (callchain_cumul_hits(child) >= min_hit)
 			rb_insert_callchain(&node->rb_root, child,
@@ -117,13 +122,18 @@ sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
 static void __sort_chain_graph_rel(struct callchain_node *node,
 				   double min_percent)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 	u64 min_hit;
 
 	node->rb_root = RB_ROOT;
 	min_hit = ceil(node->children_hit * min_percent);
 
-	chain_for_each_child(child, node) {
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
 		__sort_chain_graph_rel(child, min_percent);
 		if (callchain_cumul_hits(child) >= min_hit)
 			rb_insert_callchain(&node->rb_root, child,
@@ -173,19 +183,26 @@ create_child(struct callchain_node *parent, bool inherit_children)
 		return NULL;
 	}
 	new->parent = parent;
-	INIT_LIST_HEAD(&new->children);
 	INIT_LIST_HEAD(&new->val);
 
 	if (inherit_children) {
-		struct callchain_node *next;
+		struct rb_node *n;
+		struct callchain_node *child;
+
+		new->rb_root_in = parent->rb_root_in;
+		parent->rb_root_in = RB_ROOT;
 
-		list_splice(&parent->children, &new->children);
-		INIT_LIST_HEAD(&parent->children);
+		n = rb_first(&new->rb_root_in);
+		while (n) {
+			child = rb_entry(n, struct callchain_node, rb_node_in);
+			child->parent = new;
+			n = rb_next(n);
+		}
 
-		chain_for_each_child(next, new)
-			next->parent = new;
+		/* make it the first child */
+		rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node);
+		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
 	}
-	list_add_tail(&new->siblings, &parent->children);
 
 	return new;
 }
@@ -223,7 +240,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 	}
 }
 
-static void
+static struct callchain_node *
 add_child(struct callchain_node *parent,
 	  struct callchain_cursor *cursor,
 	  u64 period)
@@ -235,6 +252,19 @@ add_child(struct callchain_node *parent,
 
 	new->children_hit = 0;
 	new->hit = period;
+	return new;
+}
+
+static s64 match_chain(struct callchain_cursor_node *node,
+		      struct callchain_list *cnode)
+{
+	struct symbol *sym = node->sym;
+
+	if (cnode->ms.sym && sym &&
+	    callchain_param.key == CCKEY_FUNCTION)
+		return cnode->ms.sym->start - sym->start;
+	else
+		return cnode->ip - node->ip;
 }
 
 /*
@@ -272,9 +302,33 @@ split_add_child(struct callchain_node *parent,
 
 	/* create a new child for the new branch if any */
 	if (idx_total < cursor->nr) {
+		struct callchain_node *first;
+		struct callchain_list *cnode;
+		struct callchain_cursor_node *node;
+		struct rb_node *p, **pp;
+
 		parent->hit = 0;
-		add_child(parent, cursor, period);
 		parent->children_hit += period;
+
+		node = callchain_cursor_current(cursor);
+		new = add_child(parent, cursor, period);
+
+		/*
+		 * This is second child since we moved parent's children
+		 * to new (first) child above.
+		 */
+		p = parent->rb_root_in.rb_node;
+		first = rb_entry(p, struct callchain_node, rb_node_in);
+		cnode = list_first_entry(&first->val, struct callchain_list,
+					 list);
+
+		if (match_chain(node, cnode) < 0)
+			pp = &p->rb_left;
+		else
+			pp = &p->rb_right;
+
+		rb_link_node(&new->rb_node_in, p, pp);
+		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
 	} else {
 		parent->hit = period;
 	}
@@ -291,16 +345,40 @@ append_chain_children(struct callchain_node *root,
 		      u64 period)
 {
 	struct callchain_node *rnode;
+	struct callchain_cursor_node *node;
+	struct rb_node **p = &root->rb_root_in.rb_node;
+	struct rb_node *parent = NULL;
+
+	node = callchain_cursor_current(cursor);
+	if (!node)
+		return;
 
 	/* lookup in childrens */
-	chain_for_each_child(rnode, root) {
-		unsigned int ret = append_chain(rnode, cursor, period);
+	while (*p) {
+		s64 ret;
+		struct callchain_list *cnode;
 
-		if (!ret)
+		parent = *p;
+		rnode = rb_entry(parent, struct callchain_node, rb_node_in);
+		cnode = list_first_entry(&rnode->val, struct callchain_list,
+					 list);
+
+		/* just check first entry */
+		ret = match_chain(node, cnode);
+		if (ret == 0) {
+			append_chain(rnode, cursor, period);
 			goto inc_children_hit;
+		}
+
+		if (ret < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
 	}
 	/* nothing in children, add to the current node */
-	add_child(root, cursor, period);
+	rnode = add_child(root, cursor, period);
+	rb_link_node(&rnode->rb_node_in, parent, p);
+	rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
 
 inc_children_hit:
 	root->children_hit += period;
@@ -325,28 +403,20 @@ append_chain(struct callchain_node *root,
 	 */
 	list_for_each_entry(cnode, &root->val, list) {
 		struct callchain_cursor_node *node;
-		struct symbol *sym;
 
 		node = callchain_cursor_current(cursor);
 		if (!node)
 			break;
 
-		sym = node->sym;
-
-		if (cnode->ms.sym && sym &&
-		    callchain_param.key == CCKEY_FUNCTION) {
-			if (cnode->ms.sym->start != sym->start)
-				break;
-		} else if (cnode->ip != node->ip)
+		if (match_chain(node, cnode) != 0)
 			break;
 
-		if (!found)
-			found = true;
+		found = true;
 
 		callchain_cursor_advance(cursor);
 	}
 
-	/* matches not, relay on the parent */
+	/* matches not, relay no the parent */
 	if (!found) {
 		cursor->curr = curr_snap;
 		cursor->pos = start;
@@ -395,8 +465,9 @@ merge_chain_branch(struct callchain_cursor *cursor,
 		   struct callchain_node *dst, struct callchain_node *src)
 {
 	struct callchain_cursor_node **old_last = cursor->last;
-	struct callchain_node *child, *next_child;
+	struct callchain_node *child;
 	struct callchain_list *list, *next_list;
+	struct rb_node *n;
 	int old_pos = cursor->nr;
 	int err = 0;
 
@@ -412,12 +483,16 @@ merge_chain_branch(struct callchain_cursor *cursor,
 		append_chain_children(dst, cursor, src->hit);
 	}
 
-	chain_for_each_child_safe(child, next_child, src) {
+	n = rb_first(&src->rb_root_in);
+	while (n) {
+		child = container_of(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+		rb_erase(&child->rb_node_in, &src->rb_root_in);
+
 		err = merge_chain_branch(cursor, dst, child);
 		if (err)
 			break;
 
-		list_del(&child->siblings);
 		free(child);
 	}
 
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 2b585bc..7bb3602 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -21,11 +21,11 @@ enum chain_order {
 
 struct callchain_node {
 	struct callchain_node	*parent;
-	struct list_head	siblings;
-	struct list_head	children;
 	struct list_head	val;
-	struct rb_node		rb_node; /* to sort nodes in an rbtree */
-	struct rb_root		rb_root; /* sorted tree of children */
+	struct rb_node		rb_node_in; /* to insert nodes in an rbtree */
+	struct rb_node		rb_node;    /* to sort nodes in an output tree */
+	struct rb_root		rb_root_in; /* input tree of children */
+	struct rb_root		rb_root;    /* sorted output tree of children */
 	unsigned int		val_nr;
 	u64			hit;
 	u64			children_hit;
@@ -86,13 +86,12 @@ extern __thread struct callchain_cursor callchain_cursor;
 
 static inline void callchain_init(struct callchain_root *root)
 {
-	INIT_LIST_HEAD(&root->node.siblings);
-	INIT_LIST_HEAD(&root->node.children);
 	INIT_LIST_HEAD(&root->node.val);
 
 	root->node.parent = NULL;
 	root->node.hit = 0;
 	root->node.children_hit = 0;
+	root->node.rb_root_in = RB_ROOT;
 	root->max_depth = 0;
 }
 

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* Re: [tip:perf/core] perf callchain: Convert children list to rbtree
  2013-10-23  7:54   ` [tip:perf/core] " tip-bot for Namhyung Kim
@ 2013-10-23 11:07     ` Frederic Weisbecker
  2013-10-23 12:45       ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 91+ messages in thread
From: Frederic Weisbecker @ 2013-10-23 11:07 UTC (permalink / raw)
  To: mingo, hpa, paulus, linux-kernel, acme, torvalds, a.p.zijlstra,
	namhyung.kim, namhyung, jolsa, tglx
  Cc: linux-tip-commits

On Wed, Oct 23, 2013 at 12:54:48AM -0700, tip-bot for Namhyung Kim wrote:
> Commit-ID:  e369517ce5f796945c6af047b4e8b1d650e03458
> Gitweb:     http://git.kernel.org/tip/e369517ce5f796945c6af047b4e8b1d650e03458
> Author:     Namhyung Kim <namhyung.kim@lge.com>
> AuthorDate: Fri, 11 Oct 2013 14:15:36 +0900
> Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
> CommitDate: Mon, 21 Oct 2013 17:33:23 -0300
> 
> perf callchain: Convert children list to rbtree
> 
> Current collapse stage has a scalability problem which can be reproduced
> easily with a parallel kernel build.
> 
> This is because it needs to traverse every children of callchains
> linearly during the collapse/merge stage.
> 
> Converting it to a rbtree reduced the overhead significantly.
> 
> On my 400MB perf.data file which recorded with make -j32 kernel build:
> 
>   $ time perf --no-pager report --stdio > /dev/null
> 
> before:
>   real	6m22.073s
>   user	6m18.683s
>   sys	0m0.706s
> 
> after:
>   real	0m20.780s
>   user	0m19.962s
>   sys	0m0.689s
> 
> During the perf report the overhead on append_chain_children went down
> from 96.69% to 18.16%:
> 
>   -  18.16%  perf  perf                [.] append_chain_children
>      - append_chain_children
>         - 77.48% append_chain_children
>            + 69.79% merge_chain_branch
>            - 22.96% append_chain_children
>               + 67.44% merge_chain_branch
>               + 30.15% append_chain_children
>               + 2.41% callchain_append
>            + 7.25% callchain_append
>         + 12.26% callchain_append
>         + 10.22% merge_chain_branch
>   +  11.58%  perf  perf                [.] dso__find_symbol
>   +   8.02%  perf  perf                [.] sort__comm_cmp
>   +   5.48%  perf  libc-2.17.so        [.] malloc_consolidate
> 
> Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Jiri Olsa <jolsa@redhat.com>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Link: http://lkml.kernel.org/r/1381468543-25334-2-git-send-email-namhyung@kernel.org
> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

I finally found some time to review this, and the patch looks good, thanks a lot Namhyung!

Just a few non critical thoughts:

> ---
>  tools/perf/util/callchain.c | 147 +++++++++++++++++++++++++++++++++-----------
>  tools/perf/util/callchain.h |  11 ++--
>  2 files changed, 116 insertions(+), 42 deletions(-)
> 
> diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> index 482f680..e3970e3 100644
> --- a/tools/perf/util/callchain.c
> +++ b/tools/perf/util/callchain.c
> @@ -21,12 +21,6 @@
>  
>  __thread struct callchain_cursor callchain_cursor;
>  
> -#define chain_for_each_child(child, parent)	\
> -	list_for_each_entry(child, &parent->children, siblings)
> -
> -#define chain_for_each_child_safe(child, next, parent)	\
> -	list_for_each_entry_safe(child, next, &parent->children, siblings)

We can probably keep these APIs using the rb root iteration code.

There should be an rb_node_for_each() anyway, and rb_node_for_each_entry(). People
refused to have to have such an API because they feared it would encourage blind use
of this O(n log n) iteration. There is plenty of such usecase in practice though, especially
in perf. So I think we can now start to think about it for real.

> -
>  static void
>  rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
>  		    enum chain_mode mode)
> @@ -71,10 +65,16 @@ static void
>  __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
>  		  u64 min_hit)
>  {
> +	struct rb_node *n;
>  	struct callchain_node *child;
>  
> -	chain_for_each_child(child, node)
> +	n = rb_first(&node->rb_root_in);
> +	while (n) {
> +		child = rb_entry(n, struct callchain_node, rb_node_in);
> +		n = rb_next(n);
> +
>  		__sort_chain_flat(rb_root, child, min_hit);
> +	}
>  
>  	if (node->hit && node->hit >= min_hit)
>  		rb_insert_callchain(rb_root, node, CHAIN_FLAT);
> @@ -94,11 +94,16 @@ sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
>  static void __sort_chain_graph_abs(struct callchain_node *node,
>  				   u64 min_hit)
>  {
> +	struct rb_node *n;
>  	struct callchain_node *child;
>  
>  	node->rb_root = RB_ROOT;
> +	n = rb_first(&node->rb_root_in);
> +
> +	while (n) {
> +		child = rb_entry(n, struct callchain_node, rb_node_in);
> +		n = rb_next(n);
>  
> -	chain_for_each_child(child, node) {
>  		__sort_chain_graph_abs(child, min_hit);
>  		if (callchain_cumul_hits(child) >= min_hit)
>  			rb_insert_callchain(&node->rb_root, child,
> @@ -117,13 +122,18 @@ sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
>  static void __sort_chain_graph_rel(struct callchain_node *node,
>  				   double min_percent)
>  {
> +	struct rb_node *n;
>  	struct callchain_node *child;
>  	u64 min_hit;
>  
>  	node->rb_root = RB_ROOT;
>  	min_hit = ceil(node->children_hit * min_percent);
>  
> -	chain_for_each_child(child, node) {
> +	n = rb_first(&node->rb_root_in);
> +	while (n) {
> +		child = rb_entry(n, struct callchain_node, rb_node_in);
> +		n = rb_next(n);
> +
>  		__sort_chain_graph_rel(child, min_percent);
>  		if (callchain_cumul_hits(child) >= min_hit)
>  			rb_insert_callchain(&node->rb_root, child,
> @@ -173,19 +183,26 @@ create_child(struct callchain_node *parent, bool inherit_children)
>  		return NULL;
>  	}
>  	new->parent = parent;
> -	INIT_LIST_HEAD(&new->children);
>  	INIT_LIST_HEAD(&new->val);
>  
>  	if (inherit_children) {
> -		struct callchain_node *next;
> +		struct rb_node *n;
> +		struct callchain_node *child;
> +
> +		new->rb_root_in = parent->rb_root_in;
> +		parent->rb_root_in = RB_ROOT;
>  
> -		list_splice(&parent->children, &new->children);
> -		INIT_LIST_HEAD(&parent->children);
> +		n = rb_first(&new->rb_root_in);
> +		while (n) {
> +			child = rb_entry(n, struct callchain_node, rb_node_in);
> +			child->parent = new;
> +			n = rb_next(n);
> +		}
>  
> -		chain_for_each_child(next, new)
> -			next->parent = new;
> +		/* make it the first child */
> +		rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node);
> +		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
>  	}
> -	list_add_tail(&new->siblings, &parent->children);
>  
>  	return new;
>  }
> @@ -223,7 +240,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
>  	}
>  }
>  
> -static void
> +static struct callchain_node *
>  add_child(struct callchain_node *parent,
>  	  struct callchain_cursor *cursor,
>  	  u64 period)
> @@ -235,6 +252,19 @@ add_child(struct callchain_node *parent,
>  
>  	new->children_hit = 0;
>  	new->hit = period;
> +	return new;
> +}
> +
> +static s64 match_chain(struct callchain_cursor_node *node,
> +		      struct callchain_list *cnode)
> +{
> +	struct symbol *sym = node->sym;
> +
> +	if (cnode->ms.sym && sym &&
> +	    callchain_param.key == CCKEY_FUNCTION)
> +		return cnode->ms.sym->start - sym->start;
> +	else
> +		return cnode->ip - node->ip;
>  }
>  
>  /*
> @@ -272,9 +302,33 @@ split_add_child(struct callchain_node *parent,
>  
>  	/* create a new child for the new branch if any */
>  	if (idx_total < cursor->nr) {
> +		struct callchain_node *first;
> +		struct callchain_list *cnode;
> +		struct callchain_cursor_node *node;
> +		struct rb_node *p, **pp;
> +
>  		parent->hit = 0;
> -		add_child(parent, cursor, period);
>  		parent->children_hit += period;
> +
> +		node = callchain_cursor_current(cursor);
> +		new = add_child(parent, cursor, period);
> +
> +		/*
> +		 * This is second child since we moved parent's children
> +		 * to new (first) child above.
> +		 */
> +		p = parent->rb_root_in.rb_node;
> +		first = rb_entry(p, struct callchain_node, rb_node_in);
> +		cnode = list_first_entry(&first->val, struct callchain_list,
> +					 list);
> +
> +		if (match_chain(node, cnode) < 0)
> +			pp = &p->rb_left;
> +		else
> +			pp = &p->rb_right;
> +
> +		rb_link_node(&new->rb_node_in, p, pp);
> +		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
>  	} else {
>  		parent->hit = period;
>  	}
> @@ -291,16 +345,40 @@ append_chain_children(struct callchain_node *root,
>  		      u64 period)
>  {
>  	struct callchain_node *rnode;
> +	struct callchain_cursor_node *node;
> +	struct rb_node **p = &root->rb_root_in.rb_node;
> +	struct rb_node *parent = NULL;
> +
> +	node = callchain_cursor_current(cursor);
> +	if (!node)
> +		return;
>  
>  	/* lookup in childrens */
> -	chain_for_each_child(rnode, root) {
> -		unsigned int ret = append_chain(rnode, cursor, period);
> +	while (*p) {
> +		s64 ret;
> +		struct callchain_list *cnode;
>  
> -		if (!ret)
> +		parent = *p;
> +		rnode = rb_entry(parent, struct callchain_node, rb_node_in);
> +		cnode = list_first_entry(&rnode->val, struct callchain_list,
> +					 list);
> +
> +		/* just check first entry */
> +		ret = match_chain(node, cnode);
> +		if (ret == 0) {
> +			append_chain(rnode, cursor, period);

Then append_chain() probably deserve a cleanup. In fact it shouldn't
anymore have code to handle cases where there is no match.

I'm adding that to my TODO-list.

>  			goto inc_children_hit;
> +		}
> +
> +		if (ret < 0)
> +			p = &parent->rb_left;
> +		else
> +			p = &parent->rb_right;
>  	}
>  	/* nothing in children, add to the current node */
> -	add_child(root, cursor, period);
> +	rnode = add_child(root, cursor, period);
> +	rb_link_node(&rnode->rb_node_in, parent, p);
> +	rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
>  
>  inc_children_hit:
>  	root->children_hit += period;
> @@ -325,28 +403,20 @@ append_chain(struct callchain_node *root,
>  	 */
>  	list_for_each_entry(cnode, &root->val, list) {
>  		struct callchain_cursor_node *node;
> -		struct symbol *sym;
>  
>  		node = callchain_cursor_current(cursor);
>  		if (!node)
>  			break;
>  
> -		sym = node->sym;
> -
> -		if (cnode->ms.sym && sym &&
> -		    callchain_param.key == CCKEY_FUNCTION) {
> -			if (cnode->ms.sym->start != sym->start)
> -				break;
> -		} else if (cnode->ip != node->ip)
> +		if (match_chain(node, cnode) != 0)
>  			break;
>  
> -		if (!found)
> -			found = true;
> +		found = true;
>  
>  		callchain_cursor_advance(cursor);
>  	}
>  
> -	/* matches not, relay on the parent */
> +	/* matches not, relay no the parent */

Probably some mistake?

>  	if (!found) {
>  		cursor->curr = curr_snap;
>  		cursor->pos = start;
> @@ -395,8 +465,9 @@ merge_chain_branch(struct callchain_cursor *cursor,
>  		   struct callchain_node *dst, struct callchain_node *src)
>  {
>  	struct callchain_cursor_node **old_last = cursor->last;
> -	struct callchain_node *child, *next_child;
> +	struct callchain_node *child;
>  	struct callchain_list *list, *next_list;
> +	struct rb_node *n;
>  	int old_pos = cursor->nr;
>  	int err = 0;
>  
> @@ -412,12 +483,16 @@ merge_chain_branch(struct callchain_cursor *cursor,
>  		append_chain_children(dst, cursor, src->hit);
>  	}
>  
> -	chain_for_each_child_safe(child, next_child, src) {
> +	n = rb_first(&src->rb_root_in);
> +	while (n) {
> +		child = container_of(n, struct callchain_node, rb_node_in);
> +		n = rb_next(n);
> +		rb_erase(&child->rb_node_in, &src->rb_root_in);

I'm not sure if it works that way. Maybe. Or maybe we can miss some entries
if we erase the parent and then we look at the next entry after the child.

For example:

       A
      / \
     B   C

All have the same weight. Then we can have:

n = first(); // n = A
child = next(n); // child = B


      C
      |
      B

remove(n) // remove A
n = child; // n = B
child = next(n); // child = NULL

Ok this probably can't happen. I guess we are supposed to end up with:

      B
      |
      C

Still the removal of parents behind a walk through children is scary. Maybe that works
but it's scary.

I'd rather have:

while ((entry = rb_first(root)) != NULL)
    rb_erase(root, entry);

> +
>  		err = merge_chain_branch(cursor, dst, child);
>  		if (err)
>  			break;
>  
> -		list_del(&child->siblings);
>  		free(child);
>  	}
>  
> diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
> index 2b585bc..7bb3602 100644
> --- a/tools/perf/util/callchain.h
> +++ b/tools/perf/util/callchain.h
> @@ -21,11 +21,11 @@ enum chain_order {
>  
>  struct callchain_node {
>  	struct callchain_node	*parent;
> -	struct list_head	siblings;
> -	struct list_head	children;
>  	struct list_head	val;
> -	struct rb_node		rb_node; /* to sort nodes in an rbtree */
> -	struct rb_root		rb_root; /* sorted tree of children */

This needs some more details I think. Also following this logic, rb_root should be rb_root_out.

> +	struct rb_node		rb_node_in; /* to insert nodes in an rbtree */
> +	struct rb_node		rb_node;    /* to sort nodes in an output tree */
> +	struct rb_root		rb_root_in; /* input tree of children */
> +	struct rb_root		rb_root;    /* sorted output tree of children */
>  	unsigned int		val_nr;
>  	u64			hit;
>  	u64			children_hit;
> @@ -86,13 +86,12 @@ extern __thread struct callchain_cursor callchain_cursor;
>  
>  static inline void callchain_init(struct callchain_root *root)
>  {
> -	INIT_LIST_HEAD(&root->node.siblings);
> -	INIT_LIST_HEAD(&root->node.children);
>  	INIT_LIST_HEAD(&root->node.val);
>  
>  	root->node.parent = NULL;
>  	root->node.hit = 0;
>  	root->node.children_hit = 0;
> +	root->node.rb_root_in = RB_ROOT;
>  	root->max_depth = 0;
>  }
>  

Ok other than that it's all good. I'm adding all these details in my TODO list and will send some
patches, thanks again for this optimization!

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [tip:perf/core] perf callchain: Convert children list to rbtree
  2013-10-23 11:07     ` Frederic Weisbecker
@ 2013-10-23 12:45       ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-23 12:45 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: mingo, hpa, paulus, linux-kernel, torvalds, a.p.zijlstra,
	namhyung.kim, namhyung, jolsa, tglx, linux-tip-commits

Em Wed, Oct 23, 2013 at 12:07:26PM +0100, Frederic Weisbecker escreveu:
> On Wed, Oct 23, 2013 at 12:54:48AM -0700, tip-bot for Namhyung Kim wrote:
> > -#define chain_for_each_child(child, parent)	\
> > -	list_for_each_entry(child, &parent->children, siblings)
> > -
> > -#define chain_for_each_child_safe(child, next, parent)	\
> > -	list_for_each_entry_safe(child, next, &parent->children, siblings)
> 
> We can probably keep these APIs using the rb root iteration code.
> 
> There should be an rb_node_for_each() anyway, and rb_node_for_each_entry(). People
> refused to have to have such an API because they feared it would encourage blind use
> of this O(n log n) iteration. There is plenty of such usecase in practice though, especially
> in perf. So I think we can now start to think about it for real.

I tried introducing one, PeterZ objected, on the grounds you mentioned,
and I still think they are needed, agree with you. Just put a:

/*
   DANGER WILL ROBINSON DANGER

   You should know what you're doing, PeterZ is watching you!

   I CAN NOT ACCEPT THAT PERSON ACTION
*/

or something like that above such helpers ;-)

- Arnaldo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* [tip:perf/core] perf tools: Show progress on histogram collapsing
  2013-10-11  5:15 ` [PATCH 3/8] perf tools: Show progress on histogram collapsing Namhyung Kim
@ 2013-10-25 10:33   ` tip-bot for Namhyung Kim
  2013-10-28 15:03   ` [tip:perf/urgent] perf script python: Fix mem leak due to missing Py_DECREFs on dict entries tip-bot for Joseph Schuchart
  1 sibling, 0 replies; 91+ messages in thread
From: tip-bot for Namhyung Kim @ 2013-10-25 10:33 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: acme, linux-kernel, paulus, hpa, mingo, a.p.zijlstra, torvalds,
	namhyung.kim, namhyung, jolsa, fweisbec, tglx

Commit-ID:  c1fb5651bb40f9efaf32d280f39e06df7e352673
Gitweb:     http://git.kernel.org/tip/c1fb5651bb40f9efaf32d280f39e06df7e352673
Author:     Namhyung Kim <namhyung.kim@lge.com>
AuthorDate: Fri, 11 Oct 2013 14:15:38 +0900
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Wed, 23 Oct 2013 15:48:24 -0300

perf tools: Show progress on histogram collapsing

It can take quite amount of time so add progress bar UI to inform user.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1381468543-25334-4-git-send-email-namhyung@kernel.org
[ perf_progress -> ui_progress ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-annotate.c |  2 +-
 tools/perf/builtin-diff.c     |  2 +-
 tools/perf/builtin-report.c   | 10 +++++++++-
 tools/perf/builtin-top.c      |  4 ++--
 tools/perf/tests/hists_link.c |  2 +-
 tools/perf/util/hist.c        |  5 ++++-
 tools/perf/util/hist.h        |  3 ++-
 7 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 17c6b49..6c5ae57 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -247,7 +247,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
 
 		if (nr_samples > 0) {
 			total_nr_samples += nr_samples;
-			hists__collapse_resort(hists);
+			hists__collapse_resort(hists, NULL);
 			hists__output_resort(hists);
 
 			if (symbol_conf.event_group &&
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 9c82888..b605009 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -369,7 +369,7 @@ static void perf_evlist__collapse_resort(struct perf_evlist *evlist)
 	list_for_each_entry(evsel, &evlist->entries, node) {
 		struct hists *hists = &evsel->hists;
 
-		hists__collapse_resort(hists);
+		hists__collapse_resort(hists, NULL);
 	}
 }
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index e3598a4..98d3891 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -496,6 +496,7 @@ static int __cmd_report(struct perf_report *rep)
 	struct map *kernel_map;
 	struct kmap *kernel_kmap;
 	const char *help = "For a higher level overview, try: perf report --sort comm,dso";
+	struct ui_progress prog;
 	struct perf_data_file *file = session->file;
 
 	signal(SIGINT, sig_handler);
@@ -558,13 +559,19 @@ static int __cmd_report(struct perf_report *rep)
 	}
 
 	nr_samples = 0;
+	list_for_each_entry(pos, &session->evlist->entries, node)
+		nr_samples += pos->hists.nr_entries;
+
+	ui_progress__init(&prog, nr_samples, "Merging related events...");
+
+	nr_samples = 0;
 	list_for_each_entry(pos, &session->evlist->entries, node) {
 		struct hists *hists = &pos->hists;
 
 		if (pos->idx == 0)
 			hists->symbol_filter_str = rep->symbol_filter_str;
 
-		hists__collapse_resort(hists);
+		hists__collapse_resort(hists, &prog);
 		nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE];
 
 		/* Non-group events are considered as leader */
@@ -576,6 +583,7 @@ static int __cmd_report(struct perf_report *rep)
 			hists__link(leader_hists, hists);
 		}
 	}
+	ui_progress__finish();
 
 	if (session_done())
 		return 0;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 386d833..76c9264 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -286,7 +286,7 @@ static void perf_top__print_sym_table(struct perf_top *top)
 		return;
 	}
 
-	hists__collapse_resort(&top->sym_evsel->hists);
+	hists__collapse_resort(&top->sym_evsel->hists, NULL);
 	hists__output_resort(&top->sym_evsel->hists);
 	hists__decay_entries(&top->sym_evsel->hists,
 			     top->hide_user_symbols,
@@ -552,7 +552,7 @@ static void perf_top__sort_new_samples(void *arg)
 	if (t->evlist->selected != NULL)
 		t->sym_evsel = t->evlist->selected;
 
-	hists__collapse_resort(&t->sym_evsel->hists);
+	hists__collapse_resort(&t->sym_evsel->hists, NULL);
 	hists__output_resort(&t->sym_evsel->hists);
 	hists__decay_entries(&t->sym_evsel->hists,
 			     t->hide_user_symbols,
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 025503a..b51abcb 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -467,7 +467,7 @@ int test__hists_link(void)
 		goto out;
 
 	list_for_each_entry(evsel, &evlist->entries, node) {
-		hists__collapse_resort(&evsel->hists);
+		hists__collapse_resort(&evsel->hists, NULL);
 
 		if (verbose > 2)
 			print_hists(&evsel->hists);
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index f0b863e..7e80253 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -399,6 +399,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
 	if (!he)
 		return NULL;
 
+	hists->nr_entries++;
 	rb_link_node(&he->rb_node_in, parent, p);
 	rb_insert_color(&he->rb_node_in, hists->entries_in);
 out:
@@ -604,7 +605,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
 	hists__filter_entry_by_symbol(hists, he);
 }
 
-void hists__collapse_resort(struct hists *hists)
+void hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
 {
 	struct rb_root *root;
 	struct rb_node *next;
@@ -631,6 +632,8 @@ void hists__collapse_resort(struct hists *hists)
 			 */
 			hists__apply_filters(hists, n);
 		}
+		if (prog)
+			ui_progress__update(prog, 1);
 	}
 }
 
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 20b1758..0c7ce8b 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -5,6 +5,7 @@
 #include <pthread.h>
 #include "callchain.h"
 #include "header.h"
+#include "ui/progress.h"
 
 extern struct callchain_param callchain_param;
 
@@ -108,7 +109,7 @@ struct hist_entry *__hists__add_mem_entry(struct hists *self,
 					  u64 weight);
 
 void hists__output_resort(struct hists *self);
-void hists__collapse_resort(struct hists *self);
+void hists__collapse_resort(struct hists *self, struct ui_progress *prog);
 
 void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
 void hists__output_recalc_col_len(struct hists *hists, int max_rows);

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-11  5:15 ` [PATCH 6/8] perf tools: Add new comm infrastructure Namhyung Kim
@ 2013-10-25 10:56   ` Frederic Weisbecker
  2013-10-25 13:04     ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 91+ messages in thread
From: Frederic Weisbecker @ 2013-10-25 10:56 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds, Jiri Olsa,
	David Ahern, Ingo Molnar, Arnaldo Carvalho de Melo,
	Stephane Eranian

2013/10/11 Namhyung Kim <namhyung@kernel.org>:
> From: Frederic Weisbecker <fweisbec@gmail.com>
>
> This new comm infrastructure provides two features:
>
> 1) It keeps track of all comms lifecycle for a given thread. This
> way we can associate a timeframe to any thread comm, as long as
> PERF_SAMPLE_TIME samples are joined to comm and fork events.
>
> As a result we should have more precise comm sorted hists with
> seperated entries for pre and post exec time after a fork.
>
> 2) It also makes sure that a given comm string is not duplicated
> but rather shared among the threads that refer to it. This way the
> threads comm can be compared against pointer values from the sort
> infrastructure.
>
> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Jiri Olsa <jolsa@redhat.com>
> Cc: David Ahern <dsahern@gmail.com>
> Cc: Ingo Molnar <mingo@elte.hu>
> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
> Cc: Stephane Eranian <eranian@google.com>
> Link: http://lkml.kernel.org/n/tip-hwjf70b2wve9m2kosxiq8bb3@git.kernel.org
> [ Rename some accessor functions ]
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> [ Fixed up minor const pointer issues and removed 'self' usage ]
> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

Hi,

I was wondering about the fate of these patches. I can resend these or
do any rebase if you need to.

Thanks.

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-25 10:56   ` Frederic Weisbecker
@ 2013-10-25 13:04     ` Arnaldo Carvalho de Melo
  2013-10-25 15:33       ` David Ahern
  0 siblings, 1 reply; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-25 13:04 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Namhyung Kim, Peter Zijlstra, Paul Mackerras, Ingo Molnar,
	Namhyung Kim, LKML, Linus Torvalds, Jiri Olsa, David Ahern,
	Ingo Molnar, Stephane Eranian

Em Fri, Oct 25, 2013 at 11:56:31AM +0100, Frederic Weisbecker escreveu:
> 2013/10/11 Namhyung Kim <namhyung@kernel.org>:
> > From: Frederic Weisbecker <fweisbec@gmail.com>

> > This new comm infrastructure provides two features:

> I was wondering about the fate of these patches. I can resend these or
> do any rebase if you need to.

So I applied the first few patches, but deferred working on this part,
the comm one, because Jiri Olsa had some problems with it, IIRC, Jiri?
Can you refresh my mind and ellaborate on it?

Thanks!

- Arnaldo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-25 13:04     ` Arnaldo Carvalho de Melo
@ 2013-10-25 15:33       ` David Ahern
  2013-10-25 18:12         ` Frederic Weisbecker
  0 siblings, 1 reply; 91+ messages in thread
From: David Ahern @ 2013-10-25 15:33 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Frederic Weisbecker
  Cc: Namhyung Kim, Peter Zijlstra, Paul Mackerras, Ingo Molnar,
	Namhyung Kim, LKML, Linus Torvalds, Jiri Olsa, Ingo Molnar,
	Stephane Eranian

On 10/25/13 7:04 AM, Arnaldo Carvalho de Melo wrote:
> Em Fri, Oct 25, 2013 at 11:56:31AM +0100, Frederic Weisbecker escreveu:
>> 2013/10/11 Namhyung Kim <namhyung@kernel.org>:
>>> From: Frederic Weisbecker <fweisbec@gmail.com>
>
>>> This new comm infrastructure provides two features:
>
>> I was wondering about the fate of these patches. I can resend these or
>> do any rebase if you need to.
>
> So I applied the first few patches, but deferred working on this part,
> the comm one, because Jiri Olsa had some problems with it, IIRC, Jiri?
> Can you refresh my mind and ellaborate on it?

I also saw some comm related problems with Namhyung's tree. Not sure if 
it contained all of Frederic's patches. I have not had time to chase 
down what is going on -- just that comm's coming out of perf-script were 
not correct.

David


^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-25 15:33       ` David Ahern
@ 2013-10-25 18:12         ` Frederic Weisbecker
  2013-10-25 18:14           ` Arnaldo Carvalho de Melo
  2013-10-25 18:19           ` David Ahern
  0 siblings, 2 replies; 91+ messages in thread
From: Frederic Weisbecker @ 2013-10-25 18:12 UTC (permalink / raw)
  To: David Ahern
  Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Peter Zijlstra,
	Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds,
	Jiri Olsa, Ingo Molnar, Stephane Eranian

2013/10/25 David Ahern <dsahern@gmail.com>:
> On 10/25/13 7:04 AM, Arnaldo Carvalho de Melo wrote:
>>
>> Em Fri, Oct 25, 2013 at 11:56:31AM +0100, Frederic Weisbecker escreveu:
>>>
>>> 2013/10/11 Namhyung Kim <namhyung@kernel.org>:
>>>>
>>>> From: Frederic Weisbecker <fweisbec@gmail.com>
>>
>>
>>>> This new comm infrastructure provides two features:
>>
>>
>>> I was wondering about the fate of these patches. I can resend these or
>>> do any rebase if you need to.
>>
>>
>> So I applied the first few patches, but deferred working on this part,
>> the comm one, because Jiri Olsa had some problems with it, IIRC, Jiri?
>> Can you refresh my mind and ellaborate on it?
>
>
> I also saw some comm related problems with Namhyung's tree. Not sure if it
> contained all of Frederic's patches. I have not had time to chase down what
> is going on -- just that comm's coming out of perf-script were not correct.

Oh I see. It's possible that my massive conversion to use the comm
accessor got blind at some point and left over a few things. I
remember that I only lightly tested that new comm infrastructure. I
mean I tested a lot of "perf report -s foo,bar" combinations for
performance comparisons but I haven't tested the perf script and all
the other perf tools.

I'll rebase these patches and test that wider before resending.

Thanks!

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-25 18:12         ` Frederic Weisbecker
@ 2013-10-25 18:14           ` Arnaldo Carvalho de Melo
  2013-10-25 18:19           ` David Ahern
  1 sibling, 0 replies; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-25 18:14 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: David Ahern, Namhyung Kim, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds, Jiri Olsa,
	Ingo Molnar, Stephane Eranian

Em Fri, Oct 25, 2013 at 07:12:36PM +0100, Frederic Weisbecker escreveu:
> Oh I see. It's possible that my massive conversion to use the comm
> accessor got blind at some point and left over a few things. I
> remember that I only lightly tested that new comm infrastructure. I
> mean I tested a lot of "perf report -s foo,bar" combinations for
> performance comparisons but I haven't tested the perf script and all
> the other perf tools.
> 
> I'll rebase these patches and test that wider before resending.

Thanks for doing that!

- Arnaldo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-25 18:12         ` Frederic Weisbecker
  2013-10-25 18:14           ` Arnaldo Carvalho de Melo
@ 2013-10-25 18:19           ` David Ahern
  2013-10-28  5:38             ` Namhyung Kim
  1 sibling, 1 reply; 91+ messages in thread
From: David Ahern @ 2013-10-25 18:19 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Peter Zijlstra,
	Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds,
	Jiri Olsa, Ingo Molnar, Stephane Eranian

On 10/25/13 12:12 PM, Frederic Weisbecker wrote:
> Oh I see. It's possible that my massive conversion to use the comm
> accessor got blind at some point and left over a few things. I
> remember that I only lightly tested that new comm infrastructure. I
> mean I tested a lot of "perf report -s foo,bar" combinations for
> performance comparisons but I haven't tested the perf script and all
> the other perf tools.
>
> I'll rebase these patches and test that wider before resending.

specifically, I see stuff like perf forking ls and comm still shows as 
perf even though there is COMM record with the rename to ls. I believe 
the test case was something like:

perf sched record -- ls
perf script

David


^ permalink raw reply	[flat|nested] 91+ messages in thread

* RFP: Fixing "-ga -ag -g fp -g dwarf" was Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-13  5:23             ` Ingo Molnar
@ 2013-10-25 19:09               ` Arnaldo Carvalho de Melo
  2013-10-25 19:22                 ` David Ahern
  0 siblings, 1 reply; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-25 19:09 UTC (permalink / raw)
  To: David Ahern
  Cc: Ingo Molnar, Jiri Olsa, Namhyung Kim, Peter Zijlstra,
	Paul Mackerras, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker

Em Sun, Oct 13, 2013 at 07:23:28AM +0200, Ingo Molnar escreveu:
> * David Ahern <dsahern@gmail.com> wrote:
> > On 10/12/13 10:53 AM, Ingo Molnar wrote:
> > >* David Ahern <dsahern@gmail.com> wrote:
> > >>On 10/11/13 9:11 AM, David Ahern wrote:
> > >>>It would be nice to fix the callchain arg handler to not attempt to
> > >>>process the next argument if it is not fp or dwarf.

> > >>Specifically, something like this which maintains syntax and default
> > >>fp option:

> > >Lets not maintain a broken syntax please - breaking the very simple -ga
> > >was the primary regression to begin with, so lets fix that.

> > I think I did with the second follow up patch: -ga -ag -g fp -g
> > dwarf should all work properly with fp the default for -g.

> Acked-by: Ingo Molnar <mingo@kernel.org>

Can I have this one submitted?

I guess I found it but it was malformed, didn't apply.

- Arnaldo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: RFP: Fixing "-ga -ag -g fp -g dwarf" was Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-25 19:09               ` RFP: Fixing "-ga -ag -g fp -g dwarf" was " Arnaldo Carvalho de Melo
@ 2013-10-25 19:22                 ` David Ahern
  2013-10-25 19:46                   ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 91+ messages in thread
From: David Ahern @ 2013-10-25 19:22 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Ingo Molnar, Jiri Olsa, Namhyung Kim, Peter Zijlstra,
	Paul Mackerras, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker

On 10/25/13 1:09 PM, Arnaldo Carvalho de Melo wrote:
>>> I think I did with the second follow up patch: -ga -ag -g fp -g
>>> dwarf should all work properly with fp the default for -g.
>
>> Acked-by: Ingo Molnar <mingo@kernel.org>
>
> Can I have this one submitted?
>
> I guess I found it but it was malformed, didn't apply.

Upon further review, Jiri was correct: that patch handles some of the 
old cases fine, but did not handle others. ie., it just moved the bad 
syntax problem around.

Looks like the parse-options code does not handle optional arguments. 
e.g., -g defaults to 'fp' if no argument is given. With the following 
permutations:
-gfoo
-g foo
-g -- foo

the parsing code gets confused on what 'foo' is. It needs some logic for 
callbacks to say 'nope, not my argument' and the option parsing checks 
for an alternative interpretation (e.g., "-gfoo" ?= "-f -o -o" or in the 
case of '-g foo' it means -g is the end of the arguments and foo is the 
first one not processed -- ie., the workload to run).

David


^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: RFP: Fixing "-ga -ag -g fp -g dwarf" was Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-25 19:22                 ` David Ahern
@ 2013-10-25 19:46                   ` Arnaldo Carvalho de Melo
  2013-10-26 12:03                     ` Ingo Molnar
  0 siblings, 1 reply; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-25 19:46 UTC (permalink / raw)
  To: David Ahern
  Cc: Ingo Molnar, Jiri Olsa, Namhyung Kim, Peter Zijlstra,
	Paul Mackerras, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker

Em Fri, Oct 25, 2013 at 01:22:27PM -0600, David Ahern escreveu:
> On 10/25/13 1:09 PM, Arnaldo Carvalho de Melo wrote:
> >>>I think I did with the second follow up patch: -ga -ag -g fp -g
> >>>dwarf should all work properly with fp the default for -g.

> >>Acked-by: Ingo Molnar <mingo@kernel.org>

> >Can I have this one submitted?
 
> Upon further review, Jiri was correct: that patch handles some of
> the old cases fine, but did not handle others. ie., it just moved
> the bad syntax problem around.

perhaps we can do, at least for now, with what Ingo suggested?

Namely, having:

--call-graph   Require an argument, either "dwarf" or "fp"
-g             Doesn't require anything, uses whatever is configured,
               fp if no explicit config is done in places like
               ~/.perfconfig

Fits with what most people do usually, no?

- Arnaldo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: RFP: Fixing "-ga -ag -g fp -g dwarf" was Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-25 19:46                   ` Arnaldo Carvalho de Melo
@ 2013-10-26 12:03                     ` Ingo Molnar
  2013-10-26 12:35                       ` Jiri Olsa
  2013-10-26 14:25                       ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
  0 siblings, 2 replies; 91+ messages in thread
From: Ingo Molnar @ 2013-10-26 12:03 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: David Ahern, Jiri Olsa, Namhyung Kim, Peter Zijlstra,
	Paul Mackerras, Namhyung Kim, LKML, Linus Torvalds,
	Frederic Weisbecker


* Arnaldo Carvalho de Melo <acme@ghostprotocols.net> wrote:

> Em Fri, Oct 25, 2013 at 01:22:27PM -0600, David Ahern escreveu:
> > On 10/25/13 1:09 PM, Arnaldo Carvalho de Melo wrote:
> > >>>I think I did with the second follow up patch: -ga -ag -g fp -g
> > >>>dwarf should all work properly with fp the default for -g.
> 
> > >>Acked-by: Ingo Molnar <mingo@kernel.org>
> 
> > >Can I have this one submitted?
>  
> > Upon further review, Jiri was correct: that patch handles some of
> > the old cases fine, but did not handle others. ie., it just moved
> > the bad syntax problem around.
> 
> perhaps we can do, at least for now, with what Ingo suggested?
> 
> Namely, having:
> 
> --call-graph   Require an argument, either "dwarf" or "fp"
> -g             Doesn't require anything, uses whatever is configured,
>                fp if no explicit config is done in places like
>                ~/.perfconfig
> 
> Fits with what most people do usually, no?

Please do this! Usability sucks right now, going from '-g' to
'-g <foo>' was an incompatible change, a regression I argue,
which should be fixed in an urgent branch ASAP.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: RFP: Fixing "-ga -ag -g fp -g dwarf" was Re: [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5)
  2013-10-26 12:03                     ` Ingo Molnar
@ 2013-10-26 12:35                       ` Jiri Olsa
  2013-10-26 14:25                       ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
  1 sibling, 0 replies; 91+ messages in thread
From: Jiri Olsa @ 2013-10-26 12:35 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Arnaldo Carvalho de Melo, David Ahern, Namhyung Kim,
	Peter Zijlstra, Paul Mackerras, Namhyung Kim, LKML,
	Linus Torvalds, Frederic Weisbecker

On Sat, Oct 26, 2013 at 02:03:36PM +0200, Ingo Molnar wrote:
> 
> * Arnaldo Carvalho de Melo <acme@ghostprotocols.net> wrote:
> 
> > Em Fri, Oct 25, 2013 at 01:22:27PM -0600, David Ahern escreveu:
> > > On 10/25/13 1:09 PM, Arnaldo Carvalho de Melo wrote:
> > > >>>I think I did with the second follow up patch: -ga -ag -g fp -g
> > > >>>dwarf should all work properly with fp the default for -g.
> > 
> > > >>Acked-by: Ingo Molnar <mingo@kernel.org>
> > 
> > > >Can I have this one submitted?
> >  
> > > Upon further review, Jiri was correct: that patch handles some of
> > > the old cases fine, but did not handle others. ie., it just moved
> > > the bad syntax problem around.
> > 
> > perhaps we can do, at least for now, with what Ingo suggested?
> > 
> > Namely, having:
> > 
> > --call-graph   Require an argument, either "dwarf" or "fp"
> > -g             Doesn't require anything, uses whatever is configured,
> >                fp if no explicit config is done in places like
> >                ~/.perfconfig
> > 
> > Fits with what most people do usually, no?
> 
> Please do this! Usability sucks right now, going from '-g' to
> '-g <foo>' was an incompatible change, a regression I argue,
> which should be fixed in an urgent branch ASAP.

about to finish patches with that, will send it soon

jirka

^ permalink raw reply	[flat|nested] 91+ messages in thread

* [PATCH 0/4] perf tools: Fix -g option handling
  2013-10-26 12:03                     ` Ingo Molnar
  2013-10-26 12:35                       ` Jiri Olsa
@ 2013-10-26 14:25                       ` Jiri Olsa
  2013-10-26 14:25                         ` [PATCH 1/4] perf tools: Split -g and --call-graph for record command Jiri Olsa
                                           ` (4 more replies)
  1 sibling, 5 replies; 91+ messages in thread
From: Jiri Olsa @ 2013-10-26 14:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: Corey Ashford, David Ahern, Ingo Molnar, Namhyung Kim,
	Paul Mackerras, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Andi Kleen, Adrian Hunter

hi,
changing the '-g/-G' options for record/top commands
to take NO argument and enable unwind method based
on .perfconfig setup (using FP by default).

The current -g option parsing moves into the
'--call-graph' option.

Examples:
  $ cat ~/.perfconfig:
    [top]
          call-graph = fp

    [record]
          call-graph = dwarf,8192

  $ perf record -g ls
  - enables dwarf unwind with user stack size dump 8192 bytes

  $ perf top -G
  - enables frame pointer unwind

  $ perf record --call-graph=fp ls
  - enables frame pointer unwind

  $ perf top --call-graph=dwarf,4096 ls
  - enables dwarf unwind with user stack size dump 4096 bytes

thanks,
jirka

Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
---
Jiri Olsa (4):
      perf tools: Split -g and --call-graph for record command
      perf tools: Split -G and --call-graph for top command
      perf tools: Add call-graph option support into .perfconfig
      perf tools: Add readable output for callchain debug

 tools/perf/Documentation/perf-record.txt |  6 +++++-
 tools/perf/Documentation/perf-top.txt    | 18 +++++-------------
 tools/perf/builtin-record.c              | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
 tools/perf/builtin-top.c                 | 35 +++++++++++++++++++++++++----------
 tools/perf/perf.h                        |  4 +++-
 tools/perf/util/callchain.h              |  3 +++
 tools/perf/util/evsel.c                  |  2 +-
 7 files changed, 110 insertions(+), 47 deletions(-)

^ permalink raw reply	[flat|nested] 91+ messages in thread

* [PATCH 1/4] perf tools: Split -g and --call-graph for record command
  2013-10-26 14:25                       ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
@ 2013-10-26 14:25                         ` Jiri Olsa
  2013-10-27 15:30                           ` David Ahern
                                             ` (2 more replies)
  2013-10-26 14:25                         ` [PATCH 2/4] perf tools: Split -G and --call-graph for top command Jiri Olsa
                                           ` (3 subsequent siblings)
  4 siblings, 3 replies; 91+ messages in thread
From: Jiri Olsa @ 2013-10-26 14:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jiri Olsa, Corey Ashford, David Ahern, Ingo Molnar, Namhyung Kim,
	Paul Mackerras, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Andi Kleen, Adrian Hunter

Splitting -g and --call-graph for record command, so we could
use '-g' with no option.

The '-g' option now takes NO argument and enables the configured
unwind method, which is currently the frame pointers method.

It will be possible to configure unwind method via config
file in upcoming patches.

All current '-g' arguments is overtaken by --call-graph option.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
---
 tools/perf/Documentation/perf-record.txt |  6 ++-
 tools/perf/builtin-record.c              | 71 ++++++++++++++++++++++----------
 tools/perf/util/callchain.h              |  3 ++
 3 files changed, 58 insertions(+), 22 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index f10ab63..7be62770 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -92,8 +92,12 @@ OPTIONS
 	size is rounded up to have nearest pages power of two value.
 
 -g::
+	Enables call-graph (stack chain/backtrace) recording.
+
 --call-graph::
-	Do call-graph (stack chain/backtrace) recording.
+	Setup and enable call-graph (stack chain/backtrace) recording,
+	implies -g.
+
 
 -q::
 --quiet::
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ab8d15e..27622bb 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -678,21 +678,12 @@ static int get_stack_size(char *str, unsigned long *_size)
 }
 #endif /* HAVE_LIBUNWIND_SUPPORT */
 
-int record_parse_callchain_opt(const struct option *opt,
-			       const char *arg, int unset)
+int record_parse_callchain(const char *arg, struct perf_record_opts *opts)
 {
-	struct perf_record_opts *opts = opt->value;
 	char *tok, *name, *saveptr = NULL;
 	char *buf;
 	int ret = -1;
 
-	/* --no-call-graph */
-	if (unset)
-		return 0;
-
-	/* We specified default option if none is provided. */
-	BUG_ON(!arg);
-
 	/* We need buffer that we know we can write to. */
 	buf = malloc(strlen(arg) + 1);
 	if (!buf)
@@ -730,10 +721,6 @@ int record_parse_callchain_opt(const struct option *opt,
 				ret = get_stack_size(tok, &size);
 				opts->stack_dump_size = size;
 			}
-
-			if (!ret)
-				pr_debug("callchain: stack dump size %d\n",
-					 opts->stack_dump_size);
 #endif /* HAVE_LIBUNWIND_SUPPORT */
 		} else {
 			pr_err("callchain: Unknown -g option "
@@ -744,13 +731,52 @@ int record_parse_callchain_opt(const struct option *opt,
 	} while (0);
 
 	free(buf);
+	return ret;
+}
+
+static void callchain_debug(struct perf_record_opts *opts)
+{
+	pr_debug("callchain: type %d\n", opts->call_graph);
 
+	if (opts->call_graph == CALLCHAIN_DWARF)
+		pr_debug("callchain: stack dump size %d\n",
+			 opts->stack_dump_size);
+}
+
+int record_parse_callchain_opt(const struct option *opt,
+			       const char *arg,
+			       int unset)
+{
+	struct perf_record_opts *opts = opt->value;
+	int ret;
+
+	/* --no-call-graph */
+	if (unset) {
+		opts->call_graph = CALLCHAIN_NONE;
+		pr_debug("callchain: disabled\n");
+		return 0;
+	}
+
+	ret = record_parse_callchain(arg, opts);
 	if (!ret)
-		pr_debug("callchain: type %d\n", opts->call_graph);
+		callchain_debug(opts);
 
 	return ret;
 }
 
+int record_callchain_opt(const struct option *opt,
+			 const char *arg __maybe_unused,
+			 int unset __maybe_unused)
+{
+	struct perf_record_opts *opts = opt->value;
+
+	if (opts->call_graph == CALLCHAIN_NONE)
+		opts->call_graph = CALLCHAIN_FP;
+
+	callchain_debug(opts);
+	return 0;
+}
+
 static const char * const record_usage[] = {
 	"perf record [<options>] [<command>]",
 	"perf record [<options>] -- <command> [<options>]",
@@ -779,12 +805,12 @@ static struct perf_record record = {
 	},
 };
 
-#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
+#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
 
 #ifdef HAVE_LIBUNWIND_SUPPORT
-const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
+const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
 #else
-const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
+const char record_callchain_help[] = CALLCHAIN_HELP "fp";
 #endif
 
 /*
@@ -825,9 +851,12 @@ const struct option record_options[] = {
 		     perf_evlist__parse_mmap_pages),
 	OPT_BOOLEAN(0, "group", &record.opts.group,
 		    "put the counters into a counter group"),
-	OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
-			     "mode[,dump_size]", record_callchain_help,
-			     &record_parse_callchain_opt, "fp"),
+	OPT_CALLBACK(0, "call-graph", &record.opts,
+		     "mode[,dump_size]", record_callchain_help,
+		     &record_parse_callchain_opt),
+	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
+			   NULL, "enables call-graph recording" ,
+			   &record_callchain_opt),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show counter open errors, etc)"),
 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 7bb3602..4f7f989 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -146,6 +146,9 @@ static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
 
 struct option;
 
+int record_parse_callchain(const char *arg, struct perf_record_opts *opts);
 int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
+int record_callchain_opt(const struct option *opt, const char *arg, int unset);
+
 extern const char record_callchain_help[];
 #endif	/* __PERF_CALLCHAIN_H */
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [PATCH 2/4] perf tools: Split -G and --call-graph for top command
  2013-10-26 14:25                       ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
  2013-10-26 14:25                         ` [PATCH 1/4] perf tools: Split -g and --call-graph for record command Jiri Olsa
@ 2013-10-26 14:25                         ` Jiri Olsa
  2013-10-27 15:34                           ` David Ahern
  2013-10-29  8:22                           ` [tip:perf/urgent] perf top: Split -G and --call-graph tip-bot for Jiri Olsa
  2013-10-26 14:25                         ` [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig Jiri Olsa
                                           ` (2 subsequent siblings)
  4 siblings, 2 replies; 91+ messages in thread
From: Jiri Olsa @ 2013-10-26 14:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jiri Olsa, Corey Ashford, David Ahern, Ingo Molnar, Namhyung Kim,
	Paul Mackerras, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Andi Kleen, Adrian Hunter

Splitting -G and --call-graph for record command, so we could
use '-G' with no option.

The '-G' option now takes NO argument and enables the configured
unwind method, which is currently the frame pointers method.

It will be possible to configure unwind method via config
file in upcoming patches.

All current '-G' arguments is overtaken by --call-graph option.

NOTE The documentation for top --call-graph option
     was wrongly copied from report command.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
---
 tools/perf/Documentation/perf-top.txt | 18 +++++-------------
 tools/perf/builtin-top.c              | 23 +++++++++++++----------
 2 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index c16a09e..d311974 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -143,20 +143,12 @@ Default is to monitor all CPUS.
 --asm-raw::
 	Show raw instruction encoding of assembly instructions.
 
--G [type,min,order]::
+-G::
+	Enables call-graph (stack chain/backtrace) recording.
+
 --call-graph::
-        Display call chains using type, min percent threshold and order.
-	type can be either:
-	- flat: single column, linear exposure of call chains.
-	- graph: use a graph tree, displaying absolute overhead rates.
-	- fractal: like graph, but displays relative rates. Each branch of
-		 the tree is considered as a new profiled object.
-
-	order can be either:
-	- callee: callee based call graph.
-	- caller: inverted caller based call graph.
-
-	Default: fractal,0.5,callee.
+	Setup and enable call-graph (stack chain/backtrace) recording,
+	implies -g.
 
 --max-stack::
 	Set the stack depth limit when parsing the callchain, anything
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 04f5bf2..488fec3 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1015,16 +1015,16 @@ out_delete:
 }
 
 static int
-parse_callchain_opt(const struct option *opt, const char *arg, int unset)
+callchain_opt(const struct option *opt, const char *arg, int unset)
 {
-	/*
-	 * --no-call-graph
-	 */
-	if (unset)
-		return 0;
-
 	symbol_conf.use_callchain = true;
+	return record_callchain_opt(opt, arg, unset);
+}
 
+static int
+parse_callchain_opt(const struct option *opt, const char *arg, int unset)
+{
+	symbol_conf.use_callchain = true;
 	return record_parse_callchain_opt(opt, arg, unset);
 }
 
@@ -1110,9 +1110,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 		   " abort, in_tx, transaction"),
 	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
 		    "Show a column with the number of samples"),
-	OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
-			     "mode[,dump_size]", record_callchain_help,
-			     &parse_callchain_opt, "fp"),
+	OPT_CALLBACK(0, "call-graph", &top.record_opts,
+		     "mode[,dump_size]", record_callchain_help,
+		     &parse_callchain_opt),
+	OPT_CALLBACK_NOOPT('G', NULL, &top.record_opts,
+			     NULL, "enables call-graph recording",
+			     &callchain_opt),
 	OPT_INTEGER(0, "max-stack", &top.max_stack,
 		    "Set the maximum stack depth when parsing the callchain. "
 		    "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig
  2013-10-26 14:25                       ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
  2013-10-26 14:25                         ` [PATCH 1/4] perf tools: Split -g and --call-graph for record command Jiri Olsa
  2013-10-26 14:25                         ` [PATCH 2/4] perf tools: Split -G and --call-graph for top command Jiri Olsa
@ 2013-10-26 14:25                         ` Jiri Olsa
  2013-10-27 15:36                           ` David Ahern
  2013-10-28  8:10                           ` Namhyung Kim
  2013-10-26 14:25                         ` [PATCH 4/4] perf tools: Add readable output for callchain debug Jiri Olsa
  2013-10-26 14:32                         ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
  4 siblings, 2 replies; 91+ messages in thread
From: Jiri Olsa @ 2013-10-26 14:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jiri Olsa, Corey Ashford, David Ahern, Ingo Molnar, Namhyung Kim,
	Paul Mackerras, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Andi Kleen, Adrian Hunter

Adding call-graph option support into .perfconfig file,
so it's now possible use call-graph option like:

  [top]
        call-graph = fp

  [record]
        call-graph = dwarf,8192

Above options ONLY setup the unwind method. To enable
perf record/top to actually use it the command line
option -g/-G must be specified.

The --call-graph option overloads .perfconfig setup.

Assuming above configuration:

  $ perf record -g ls
  - enables dwarf unwind with user stack size dump 8192 bytes

  $ perf top -G
  - enables frame pointer unwind

  $ perf record --call-graph=fp ls
  - enables frame pointer unwind

  $ perf top --call-graph=dwarf,4096 ls
  - enables dwarf unwind with user stack size dump 4096 bytes

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
---
 tools/perf/builtin-record.c | 16 ++++++++++++++++
 tools/perf/builtin-top.c    | 12 ++++++++++++
 tools/perf/perf.h           |  1 +
 tools/perf/util/evsel.c     |  2 +-
 4 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 27622bb..f597b9b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -750,6 +750,8 @@ int record_parse_callchain_opt(const struct option *opt,
 	struct perf_record_opts *opts = opt->value;
 	int ret;
 
+	opts->call_graph_enabled = !unset;
+
 	/* --no-call-graph */
 	if (unset) {
 		opts->call_graph = CALLCHAIN_NONE;
@@ -770,6 +772,8 @@ int record_callchain_opt(const struct option *opt,
 {
 	struct perf_record_opts *opts = opt->value;
 
+	opts->call_graph_enabled = !unset;
+
 	if (opts->call_graph == CALLCHAIN_NONE)
 		opts->call_graph = CALLCHAIN_FP;
 
@@ -777,6 +781,16 @@ int record_callchain_opt(const struct option *opt,
 	return 0;
 }
 
+static int perf_record_config(const char *var, const char *value, void *cb)
+{
+	struct perf_record *rec = cb;
+
+	if (!strcmp(var, "record.call-graph"))
+		return record_parse_callchain(value, &rec->opts);
+
+	return perf_default_config(var, value, cb);
+}
+
 static const char * const record_usage[] = {
 	"perf record [<options>] [<command>]",
 	"perf record [<options>] -- <command> [<options>]",
@@ -905,6 +919,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	rec->evlist = evsel_list;
 
+	perf_config(perf_record_config, rec);
+
 	argc = parse_options(argc, argv, record_options, record_usage,
 			    PARSE_OPT_STOP_AT_NON_OPTION);
 	if (!argc && perf_target__none(&rec->opts.target))
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 488fec3..b326f61 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1028,6 +1028,16 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 	return record_parse_callchain_opt(opt, arg, unset);
 }
 
+static int perf_top_config(const char *var, const char *value, void *cb)
+{
+	struct perf_top *top = cb;
+
+	if (!strcmp(var, "top.call-graph"))
+		return record_parse_callchain(value, &top->record_opts);
+
+	return perf_default_config(var, value, cb);
+}
+
 static int
 parse_percent_limit(const struct option *opt, const char *arg,
 		    int unset __maybe_unused)
@@ -1152,6 +1162,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (top.evlist == NULL)
 		return -ENOMEM;
 
+	perf_config(perf_top_config, &top);
+
 	argc = parse_options(argc, argv, options, top_usage, 0);
 	if (argc)
 		usage_with_options(top_usage, options);
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index f61c230..c4ed633 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -215,6 +215,7 @@ enum perf_call_graph_mode {
 struct perf_record_opts {
 	struct perf_target target;
 	int	     call_graph;
+	bool         call_graph_enabled;
 	bool	     group;
 	bool	     inherit_stat;
 	bool	     no_delay;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ec0cc1e..f1ed2ad 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -633,7 +633,7 @@ void perf_evsel__config(struct perf_evsel *evsel,
 		attr->mmap_data = track;
 	}
 
-	if (opts->call_graph) {
+	if (opts->call_graph_enabled) {
 		perf_evsel__set_sample_bit(evsel, CALLCHAIN);
 
 		if (opts->call_graph == CALLCHAIN_DWARF) {
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [PATCH 4/4] perf tools: Add readable output for callchain debug
  2013-10-26 14:25                       ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
                                           ` (2 preceding siblings ...)
  2013-10-26 14:25                         ` [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig Jiri Olsa
@ 2013-10-26 14:25                         ` Jiri Olsa
  2013-10-27 15:39                           ` David Ahern
  2013-10-26 14:32                         ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
  4 siblings, 1 reply; 91+ messages in thread
From: Jiri Olsa @ 2013-10-26 14:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jiri Olsa, Corey Ashford, David Ahern, Ingo Molnar, Namhyung Kim,
	Paul Mackerras, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Andi Kleen, Adrian Hunter

Adding people readable output for callchain debug,
to get following '-v' output:

  $ perf record -v -g ls
  callchain: type DWARF
  callchain: stack dump size 4096
  ...

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
---
 tools/perf/builtin-record.c | 4 +++-
 tools/perf/perf.h           | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f597b9b..8bdd608 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -736,7 +736,9 @@ int record_parse_callchain(const char *arg, struct perf_record_opts *opts)
 
 static void callchain_debug(struct perf_record_opts *opts)
 {
-	pr_debug("callchain: type %d\n", opts->call_graph);
+	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
+
+	pr_debug("callchain: type %s\n", str[opts->call_graph]);
 
 	if (opts->call_graph == CALLCHAIN_DWARF)
 		pr_debug("callchain: stack dump size %d\n",
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index c4ed633..e84aeeb 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -209,7 +209,8 @@ void pthread__unblock_sigwinch(void);
 enum perf_call_graph_mode {
 	CALLCHAIN_NONE,
 	CALLCHAIN_FP,
-	CALLCHAIN_DWARF
+	CALLCHAIN_DWARF,
+	CALLCHAIN_MAX
 };
 
 struct perf_record_opts {
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 91+ messages in thread

* Re: [PATCH 0/4] perf tools: Fix -g option handling
  2013-10-26 14:25                       ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
                                           ` (3 preceding siblings ...)
  2013-10-26 14:25                         ` [PATCH 4/4] perf tools: Add readable output for callchain debug Jiri Olsa
@ 2013-10-26 14:32                         ` Jiri Olsa
  2013-10-27  6:56                           ` Ingo Molnar
  4 siblings, 1 reply; 91+ messages in thread
From: Jiri Olsa @ 2013-10-26 14:32 UTC (permalink / raw)
  To: linux-kernel
  Cc: Corey Ashford, David Ahern, Ingo Molnar, Namhyung Kim,
	Paul Mackerras, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Andi Kleen, Adrian Hunter

On Sat, Oct 26, 2013 at 04:25:32PM +0200, Jiri Olsa wrote:
> hi,
> changing the '-g/-G' options for record/top commands
> to take NO argument and enable unwind method based
> on .perfconfig setup (using FP by default).
> 
> The current -g option parsing moves into the
> '--call-graph' option.

forgot to mention branch:

  git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
  perf/cc

jirka

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 0/4] perf tools: Fix -g option handling
  2013-10-26 14:32                         ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
@ 2013-10-27  6:56                           ` Ingo Molnar
  2013-10-29 10:21                             ` Jiri Olsa
  0 siblings, 1 reply; 91+ messages in thread
From: Ingo Molnar @ 2013-10-27  6:56 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: linux-kernel, Corey Ashford, David Ahern, Ingo Molnar,
	Namhyung Kim, Paul Mackerras, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Andi Kleen, Adrian Hunter


* Jiri Olsa <jolsa@redhat.com> wrote:

> On Sat, Oct 26, 2013 at 04:25:32PM +0200, Jiri Olsa wrote:
> > hi,
> > changing the '-g/-G' options for record/top commands
> > to take NO argument and enable unwind method based
> > on .perfconfig setup (using FP by default).
> > 
> > The current -g option parsing moves into the
> > '--call-graph' option.
> 
> forgot to mention branch:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
>   perf/cc

I tested it - works perfectly now for me:

Acked-and-tested-by: Ingo Molnar <mingo@kernel.org>

You might want to give --call-graph a single letter option name as well, 
-g, -G, -c, -C are all taken already, but both -l and -L are still 
available - maybe use -L for it?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 1/4] perf tools: Split -g and --call-graph for record command
  2013-10-26 14:25                         ` [PATCH 1/4] perf tools: Split -g and --call-graph for record command Jiri Olsa
@ 2013-10-27 15:30                           ` David Ahern
  2013-10-28 17:46                             ` Arnaldo Carvalho de Melo
  2013-10-28  7:59                           ` Namhyung Kim
  2013-10-29  8:22                           ` [tip:perf/urgent] perf record: Split -g and --call-graph tip-bot for Jiri Olsa
  2 siblings, 1 reply; 91+ messages in thread
From: David Ahern @ 2013-10-27 15:30 UTC (permalink / raw)
  To: Jiri Olsa, linux-kernel
  Cc: Corey Ashford, Ingo Molnar, Namhyung Kim, Paul Mackerras,
	Peter Zijlstra, Arnaldo Carvalho de Melo, Andi Kleen,
	Adrian Hunter

On 10/26/13 8:25 AM, Jiri Olsa wrote:
> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
> index f10ab63..7be62770 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -92,8 +92,12 @@ OPTIONS
>   	size is rounded up to have nearest pages power of two value.
>
>   -g::
> +	Enables call-graph (stack chain/backtrace) recording.
> +
>   --call-graph::
> -	Do call-graph (stack chain/backtrace) recording.
> +	Setup and enable call-graph (stack chain/backtrace) recording,
> +	implies -g.
> +

This needs some more words as to why it is used over -g. It is also 
missing the options that can be given (fp or dwarf).

---8<---

> @@ -825,9 +851,12 @@ const struct option record_options[] = {
>   		     perf_evlist__parse_mmap_pages),
>   	OPT_BOOLEAN(0, "group", &record.opts.group,
>   		    "put the counters into a counter group"),
> -	OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
> -			     "mode[,dump_size]", record_callchain_help,
> -			     &record_parse_callchain_opt, "fp"),
> +	OPT_CALLBACK(0, "call-graph", &record.opts,
> +		     "mode[,dump_size]", record_callchain_help,
> +		     &record_parse_callchain_opt),
> +	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
> +			   NULL, "enables call-graph recording" ,
> +			   &record_callchain_opt),

 From a user/readability perspective I would prefer the order to be -g 
then --call-graph especially since --call-graph is like an advanced -g 
where you get more control over the method used.

Other than that:

Tested-Reviewed-by: David Ahern <dsahern@gmail.com>

David


^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 2/4] perf tools: Split -G and --call-graph for top command
  2013-10-26 14:25                         ` [PATCH 2/4] perf tools: Split -G and --call-graph for top command Jiri Olsa
@ 2013-10-27 15:34                           ` David Ahern
  2013-10-28  8:06                             ` Namhyung Kim
  2013-10-29  8:22                           ` [tip:perf/urgent] perf top: Split -G and --call-graph tip-bot for Jiri Olsa
  1 sibling, 1 reply; 91+ messages in thread
From: David Ahern @ 2013-10-27 15:34 UTC (permalink / raw)
  To: Jiri Olsa, linux-kernel
  Cc: Corey Ashford, Ingo Molnar, Namhyung Kim, Paul Mackerras,
	Peter Zijlstra, Arnaldo Carvalho de Melo, Andi Kleen,
	Adrian Hunter

On 10/26/13 8:25 AM, Jiri Olsa wrote:
> diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
> index c16a09e..d311974 100644
> --- a/tools/perf/Documentation/perf-top.txt
> +++ b/tools/perf/Documentation/perf-top.txt
> @@ -143,20 +143,12 @@ Default is to monitor all CPUS.
>   --asm-raw::
>   	Show raw instruction encoding of assembly instructions.
>
> --G [type,min,order]::
> +-G::
> +	Enables call-graph (stack chain/backtrace) recording.
> +
>   --call-graph::
> -        Display call chains using type, min percent threshold and order.
> -	type can be either:
> -	- flat: single column, linear exposure of call chains.
> -	- graph: use a graph tree, displaying absolute overhead rates.
> -	- fractal: like graph, but displays relative rates. Each branch of
> -		 the tree is considered as a new profiled object.
> -
> -	order can be either:
> -	- callee: callee based call graph.
> -	- caller: inverted caller based call graph.
> -
> -	Default: fractal,0.5,callee.
> +	Setup and enable call-graph (stack chain/backtrace) recording,
> +	implies -g.

implies '-G' for perf-top. Given that perf-top is recording and 
analyzing events shouldn't the analysis options (caller/callee and 
callgraph type) be kept here?

Really should have callchains be consistent across perf-commands as -g.

Other than that: Tested-Reviewed-by: David Ahern <dsahern@gmail.com>

David

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig
  2013-10-26 14:25                         ` [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig Jiri Olsa
@ 2013-10-27 15:36                           ` David Ahern
  2013-10-28  8:10                           ` Namhyung Kim
  1 sibling, 0 replies; 91+ messages in thread
From: David Ahern @ 2013-10-27 15:36 UTC (permalink / raw)
  To: Jiri Olsa, linux-kernel
  Cc: Corey Ashford, Ingo Molnar, Namhyung Kim, Paul Mackerras,
	Peter Zijlstra, Arnaldo Carvalho de Melo, Andi Kleen,
	Adrian Hunter

On 10/26/13 8:25 AM, Jiri Olsa wrote:
> Adding call-graph option support into .perfconfig file,
> so it's now possible use call-graph option like:
>
>    [top]
>          call-graph = fp
>
>    [record]
>          call-graph = dwarf,8192
>
> Above options ONLY setup the unwind method. To enable
> perf record/top to actually use it the command line
> option -g/-G must be specified.
>
> The --call-graph option overloads .perfconfig setup.

Documentation for perf-top and perf-record should be updated with the above.

Otherwise, Tested-Reviewed-by: David Ahern <dsahern@gmail.com>

David


^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 4/4] perf tools: Add readable output for callchain debug
  2013-10-26 14:25                         ` [PATCH 4/4] perf tools: Add readable output for callchain debug Jiri Olsa
@ 2013-10-27 15:39                           ` David Ahern
  0 siblings, 0 replies; 91+ messages in thread
From: David Ahern @ 2013-10-27 15:39 UTC (permalink / raw)
  To: Jiri Olsa, linux-kernel
  Cc: Corey Ashford, Ingo Molnar, Namhyung Kim, Paul Mackerras,
	Peter Zijlstra, Arnaldo Carvalho de Melo, Andi Kleen,
	Adrian Hunter

On 10/26/13 8:25 AM, Jiri Olsa wrote:
> Adding people readable output for callchain debug,
> to get following '-v' output:
>
>    $ perf record -v -g ls
>    callchain: type DWARF
>    callchain: stack dump size 4096
>    ...

Kind of ugly that the -v has to be first to actually get the debug 
output. Other debug options have the same problem though. So,

Tested-Reviewed-by: David Ahern <dsahern@gmail.com>


^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-25 18:19           ` David Ahern
@ 2013-10-28  5:38             ` Namhyung Kim
  2013-10-28  9:09               ` Frederic Weisbecker
  0 siblings, 1 reply; 91+ messages in thread
From: Namhyung Kim @ 2013-10-28  5:38 UTC (permalink / raw)
  To: David Ahern
  Cc: Frederic Weisbecker, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds,
	Jiri Olsa, Ingo Molnar, Stephane Eranian

Hi David,

On Fri, 25 Oct 2013 12:19:07 -0600, David Ahern wrote:
> On 10/25/13 12:12 PM, Frederic Weisbecker wrote:
>> Oh I see. It's possible that my massive conversion to use the comm
>> accessor got blind at some point and left over a few things. I
>> remember that I only lightly tested that new comm infrastructure. I
>> mean I tested a lot of "perf report -s foo,bar" combinations for
>> performance comparisons but I haven't tested the perf script and all
>> the other perf tools.
>>
>> I'll rebase these patches and test that wider before resending.
>
> specifically, I see stuff like perf forking ls and comm still shows as
> perf even though there is COMM record with the rename to ls. I believe
> the test case was something like:
>
> perf sched record -- ls
> perf script

Hmm.. did you try my latest v5 patchset?  I couldn't reproduce the case
at least for the command lines above.

  $ perf script
            perf 24810 [007] 1546517.815809: sched:sched_stat_runtime: comm=perf pid=24810 ru
            perf 24810 [007] 1546517.815909: sched:sched_wakeup: comm=perf pid=24811 prio=120
         swapper     0 [008] 1546517.815913: sched:sched_switch: prev_comm=swapper/8 prev_pid
            perf 24810 [007] 1546517.815953: sched:sched_stat_runtime: comm=perf pid=24810 ru
            perf 24810 [007] 1546517.815957: sched:sched_switch: prev_comm=perf prev_pid=2481
            perf 24811 [008] 1546517.815992: sched:sched_wakeup: comm=migration/8 pid=48 prio
            perf 24811 [008] 1546517.815993: sched:sched_stat_runtime: comm=perf pid=24811 ru
            perf 24811 [008] 1546517.815993: sched:sched_switch: prev_comm=perf prev_pid=2481
     migration/8    48 [008] 1546517.815996: sched:sched_migrate_task: comm=perf pid=24811 pr
     migration/8    48 [008] 1546517.816000: sched:sched_switch: prev_comm=migration/8 prev_p
         swapper     0 [009] 1546517.816002: sched:sched_switch: prev_comm=swapper/9 prev_pid
              ls 24811 [009] 1546517.816808: sched:sched_stat_runtime: comm=ls pid=24811 runt


Here, the process 24811 has only 3 samples before COMM event

  $ perf report -D | grep 24811 | grep -v MMAP | head
  0 0 0x629b0 [0x38]: PERF_RECORD_COMM: perf:24811
  8 1546517815992058 0x65f50 [0x68]: PERF_RECORD_SAMPLE(IP, 1): 24811/24811: 0xffffffff81091512 period: 1 addr: 0
   ... thread: perf:24811
  8 1546517815993189 0x65fb8 [0x70]: PERF_RECORD_SAMPLE(IP, 1): 24811/24811: 0xffffffff81099d25 period: 83314 addr: 0
   ... thread: perf:24811
  8 1546517815993975 0x66028 [0x80]: PERF_RECORD_SAMPLE(IP, 1): 24811/24811: 0xffffffff81659d60 period: 1 addr: 0
   ... thread: perf:24811
  9 1546517816224342 0x66378 [0x38]: PERF_RECORD_COMM: ls:24811
  9 1546517816808637 0x667f0 [0x70]: PERF_RECORD_SAMPLE(IP, 1): 24811/24811: 0xffffffff81099d25 period: 810663 addr: 0
   ... thread: ls:24811

  $ perf version
  perf version 3.11.ge9eb20


Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 1/4] perf tools: Split -g and --call-graph for record command
  2013-10-26 14:25                         ` [PATCH 1/4] perf tools: Split -g and --call-graph for record command Jiri Olsa
  2013-10-27 15:30                           ` David Ahern
@ 2013-10-28  7:59                           ` Namhyung Kim
  2013-10-29 10:18                             ` Jiri Olsa
  2013-10-29  8:22                           ` [tip:perf/urgent] perf record: Split -g and --call-graph tip-bot for Jiri Olsa
  2 siblings, 1 reply; 91+ messages in thread
From: Namhyung Kim @ 2013-10-28  7:59 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: linux-kernel, Corey Ashford, David Ahern, Ingo Molnar,
	Paul Mackerras, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Andi Kleen, Adrian Hunter

Hi Jiri,

On Sat, 26 Oct 2013 16:25:33 +0200, Jiri Olsa wrote:
> Splitting -g and --call-graph for record command, so we could
> use '-g' with no option.
>
> The '-g' option now takes NO argument and enables the configured
> unwind method, which is currently the frame pointers method.
>
> It will be possible to configure unwind method via config
> file in upcoming patches.
>
> All current '-g' arguments is overtaken by --call-graph option.

  $ perf record --call-graph none -- true
  callchain: Unknown -g option value: none

It should be

  callchain: Unknown --call-graph option value: none


Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 2/4] perf tools: Split -G and --call-graph for top command
  2013-10-27 15:34                           ` David Ahern
@ 2013-10-28  8:06                             ` Namhyung Kim
  0 siblings, 0 replies; 91+ messages in thread
From: Namhyung Kim @ 2013-10-28  8:06 UTC (permalink / raw)
  To: David Ahern
  Cc: Jiri Olsa, linux-kernel, Corey Ashford, Ingo Molnar,
	Paul Mackerras, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Andi Kleen, Adrian Hunter

On Sun, 27 Oct 2013 09:34:44 -0600, David Ahern wrote:
> On 10/26/13 8:25 AM, Jiri Olsa wrote:
>> diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
>> index c16a09e..d311974 100644
>> --- a/tools/perf/Documentation/perf-top.txt
>> +++ b/tools/perf/Documentation/perf-top.txt
>> @@ -143,20 +143,12 @@ Default is to monitor all CPUS.
>>   --asm-raw::
>>   	Show raw instruction encoding of assembly instructions.
>>
>> --G [type,min,order]::
>> +-G::
>> +	Enables call-graph (stack chain/backtrace) recording.
>> +
>>   --call-graph::
>> -        Display call chains using type, min percent threshold and order.
>> -	type can be either:
>> -	- flat: single column, linear exposure of call chains.
>> -	- graph: use a graph tree, displaying absolute overhead rates.
>> -	- fractal: like graph, but displays relative rates. Each branch of
>> -		 the tree is considered as a new profiled object.
>> -
>> -	order can be either:
>> -	- callee: callee based call graph.
>> -	- caller: inverted caller based call graph.
>> -
>> -	Default: fractal,0.5,callee.
>> +	Setup and enable call-graph (stack chain/backtrace) recording,
>> +	implies -g.
>
> implies '-G' for perf-top. Given that perf-top is recording and
> analyzing events shouldn't the analysis options (caller/callee and
> callgraph type) be kept here?

Yes it should, but it seems it only lived in the documentation - the -G
option was already for fp or dwarf and no analyzing options.  Maybe we
might add --call-graph-style or something.

>
> Really should have callchains be consistent across perf-commands as -g.

Agreed.  As -g/--group option in perf top is rarely used, we maybe
disable -g option for a while and then add it as a shortcut to
--call-graph later.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig
  2013-10-26 14:25                         ` [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig Jiri Olsa
  2013-10-27 15:36                           ` David Ahern
@ 2013-10-28  8:10                           ` Namhyung Kim
  2013-10-29 10:18                             ` Jiri Olsa
  1 sibling, 1 reply; 91+ messages in thread
From: Namhyung Kim @ 2013-10-28  8:10 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: linux-kernel, Corey Ashford, David Ahern, Ingo Molnar,
	Paul Mackerras, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Andi Kleen, Adrian Hunter

On Sat, 26 Oct 2013 16:25:35 +0200, Jiri Olsa wrote:
> Adding call-graph option support into .perfconfig file,
> so it's now possible use call-graph option like:
>
>   [top]
>         call-graph = fp
>
>   [record]
>         call-graph = dwarf,8192
>
> Above options ONLY setup the unwind method. To enable
> perf record/top to actually use it the command line
> option -g/-G must be specified.
>
> The --call-graph option overloads .perfconfig setup.
>
> Assuming above configuration:
>
>   $ perf record -g ls
>   - enables dwarf unwind with user stack size dump 8192 bytes
>
>   $ perf top -G
>   - enables frame pointer unwind
>
>   $ perf record --call-graph=fp ls
>   - enables frame pointer unwind
>
>   $ perf top --call-graph=dwarf,4096 ls
>   - enables dwarf unwind with user stack size dump 4096 bytes
>
[SNIP]
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -750,6 +750,8 @@ int record_parse_callchain_opt(const struct option *opt,
>  	struct perf_record_opts *opts = opt->value;
>  	int ret;
>  
> +	opts->call_graph_enabled = !unset;
> +

Why not just using symbol_conf.use_callchain?

Thanks,
Namhyung


>  	/* --no-call-graph */
>  	if (unset) {
>  		opts->call_graph = CALLCHAIN_NONE;
> @@ -770,6 +772,8 @@ int record_callchain_opt(const struct option *opt,
>  {
>  	struct perf_record_opts *opts = opt->value;
>  
> +	opts->call_graph_enabled = !unset;
> +
>  	if (opts->call_graph == CALLCHAIN_NONE)
>  		opts->call_graph = CALLCHAIN_FP;
>  
> @@ -777,6 +781,16 @@ int record_callchain_opt(const struct option *opt,
>  	return 0;
>  }

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-28  5:38             ` Namhyung Kim
@ 2013-10-28  9:09               ` Frederic Weisbecker
  2013-10-28  9:15                 ` Namhyung Kim
  0 siblings, 1 reply; 91+ messages in thread
From: Frederic Weisbecker @ 2013-10-28  9:09 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: David Ahern, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds,
	Jiri Olsa, Ingo Molnar, Stephane Eranian

2013/10/28 Namhyung Kim <namhyung@kernel.org>:
> Hi David,
>
> On Fri, 25 Oct 2013 12:19:07 -0600, David Ahern wrote:
>> On 10/25/13 12:12 PM, Frederic Weisbecker wrote:
>>> Oh I see. It's possible that my massive conversion to use the comm
>>> accessor got blind at some point and left over a few things. I
>>> remember that I only lightly tested that new comm infrastructure. I
>>> mean I tested a lot of "perf report -s foo,bar" combinations for
>>> performance comparisons but I haven't tested the perf script and all
>>> the other perf tools.
>>>
>>> I'll rebase these patches and test that wider before resending.
>>
>> specifically, I see stuff like perf forking ls and comm still shows as
>> perf even though there is COMM record with the rename to ls. I believe
>> the test case was something like:
>>
>> perf sched record -- ls
>> perf script
>
> Hmm.. did you try my latest v5 patchset?  I couldn't reproduce the case
> at least for the command lines above.
>
>   $ perf script
>             perf 24810 [007] 1546517.815809: sched:sched_stat_runtime: comm=perf pid=24810 ru
>             perf 24810 [007] 1546517.815909: sched:sched_wakeup: comm=perf pid=24811 prio=120
>          swapper     0 [008] 1546517.815913: sched:sched_switch: prev_comm=swapper/8 prev_pid
>             perf 24810 [007] 1546517.815953: sched:sched_stat_runtime: comm=perf pid=24810 ru
>             perf 24810 [007] 1546517.815957: sched:sched_switch: prev_comm=perf prev_pid=2481
>             perf 24811 [008] 1546517.815992: sched:sched_wakeup: comm=migration/8 pid=48 prio
>             perf 24811 [008] 1546517.815993: sched:sched_stat_runtime: comm=perf pid=24811 ru
>             perf 24811 [008] 1546517.815993: sched:sched_switch: prev_comm=perf prev_pid=2481
>      migration/8    48 [008] 1546517.815996: sched:sched_migrate_task: comm=perf pid=24811 pr
>      migration/8    48 [008] 1546517.816000: sched:sched_switch: prev_comm=migration/8 prev_p
>          swapper     0 [009] 1546517.816002: sched:sched_switch: prev_comm=swapper/9 prev_pid
>               ls 24811 [009] 1546517.816808: sched:sched_stat_runtime: comm=ls pid=24811 runt
>
>
> Here, the process 24811 has only 3 samples before COMM event
>
>   $ perf report -D | grep 24811 | grep -v MMAP | head
>   0 0 0x629b0 [0x38]: PERF_RECORD_COMM: perf:24811
>   8 1546517815992058 0x65f50 [0x68]: PERF_RECORD_SAMPLE(IP, 1): 24811/24811: 0xffffffff81091512 period: 1 addr: 0
>    ... thread: perf:24811
>   8 1546517815993189 0x65fb8 [0x70]: PERF_RECORD_SAMPLE(IP, 1): 24811/24811: 0xffffffff81099d25 period: 83314 addr: 0
>    ... thread: perf:24811
>   8 1546517815993975 0x66028 [0x80]: PERF_RECORD_SAMPLE(IP, 1): 24811/24811: 0xffffffff81659d60 period: 1 addr: 0
>    ... thread: perf:24811
>   9 1546517816224342 0x66378 [0x38]: PERF_RECORD_COMM: ls:24811
>   9 1546517816808637 0x667f0 [0x70]: PERF_RECORD_SAMPLE(IP, 1): 24811/24811: 0xffffffff81099d25 period: 810663 addr: 0
>    ... thread: ls:24811
>
>   $ perf version
>   perf version 3.11.ge9eb20
>

Ah cool! Could you please remind me the name of that branch so that I
can do some tests and work on top of it?

Thanks.

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-28  9:09               ` Frederic Weisbecker
@ 2013-10-28  9:15                 ` Namhyung Kim
  2013-10-28 10:12                   ` Frederic Weisbecker
  0 siblings, 1 reply; 91+ messages in thread
From: Namhyung Kim @ 2013-10-28  9:15 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: David Ahern, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds,
	Jiri Olsa, Ingo Molnar, Stephane Eranian

Hi Frederic,

On Mon, 28 Oct 2013 10:09:46 +0100, Frederic Weisbecker wrote:
> Ah cool! Could you please remind me the name of that branch so that I
> can do some tests and work on top of it?

  git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
    perf/callchain-v5

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-28  9:15                 ` Namhyung Kim
@ 2013-10-28 10:12                   ` Frederic Weisbecker
  2013-10-28 12:43                     ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 91+ messages in thread
From: Frederic Weisbecker @ 2013-10-28 10:12 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: David Ahern, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Paul Mackerras, Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds,
	Jiri Olsa, Ingo Molnar, Stephane Eranian

On Mon, Oct 28, 2013 at 06:15:26PM +0900, Namhyung Kim wrote:
> Hi Frederic,
> 
> On Mon, 28 Oct 2013 10:09:46 +0100, Frederic Weisbecker wrote:
> > Ah cool! Could you please remind me the name of that branch so that I
> > can do some tests and work on top of it?
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
>     perf/callchain-v5

Hmm, I tried to rebase on top of tip:perf/core and git is getting confused I guess with
which is applied and which is out of tree. There have been a lot of changes since
then and thus quite some conflicts.

If you don't mind, I would love if you rebase against latest tip:perf/core as you
know better than me what has been applied and what hasn't.

Thanks!

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-28 10:12                   ` Frederic Weisbecker
@ 2013-10-28 12:43                     ` Arnaldo Carvalho de Melo
  2013-10-28 14:29                       ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-28 12:43 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Namhyung Kim, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds, Jiri Olsa,
	Ingo Molnar, Stephane Eranian

Em Mon, Oct 28, 2013 at 11:12:50AM +0100, Frederic Weisbecker escreveu:
> On Mon, Oct 28, 2013 at 06:15:26PM +0900, Namhyung Kim wrote:
> > Hi Frederic,
> > 
> > On Mon, 28 Oct 2013 10:09:46 +0100, Frederic Weisbecker wrote:
> > > Ah cool! Could you please remind me the name of that branch so that I
> > > can do some tests and work on top of it?
> > 
> >   git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
> >     perf/callchain-v5
> 
> Hmm, I tried to rebase on top of tip:perf/core and git is getting confused I guess with
> which is applied and which is out of tree. There have been a lot of changes since
> then and thus quite some conflicts.
> 
> If you don't mind, I would love if you rebase against latest tip:perf/core as you
> know better than me what has been applied and what hasn't.

I'll try to do it now.

- Arnaldo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-28 12:43                     ` Arnaldo Carvalho de Melo
@ 2013-10-28 14:29                       ` Arnaldo Carvalho de Melo
  2013-10-28 16:05                         ` Frederic Weisbecker
  0 siblings, 1 reply; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-28 14:29 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Namhyung Kim, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds, Jiri Olsa,
	Ingo Molnar, Stephane Eranian

Em Mon, Oct 28, 2013 at 09:43:09AM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Mon, Oct 28, 2013 at 11:12:50AM +0100, Frederic Weisbecker escreveu:
> > On Mon, Oct 28, 2013 at 06:15:26PM +0900, Namhyung Kim wrote:
> > > On Mon, 28 Oct 2013 10:09:46 +0100, Frederic Weisbecker wrote:
> > > > Ah cool! Could you please remind me the name of that branch so that I
> > > > can do some tests and work on top of it?

> > >   git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
> > >     perf/callchain-v5

> > Hmm, I tried to rebase on top of tip:perf/core and git is getting confused I guess with
> > which is applied and which is out of tree. There have been a lot of changes since
> > then and thus quite some conflicts.
> > 
> > If you don't mind, I would love if you rebase against latest tip:perf/core as you
> > know better than me what has been applied and what hasn't.
 
> I'll try to do it now.

Can you please try with:

	git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git
	  perf/comm

And share your results? I fixed up everything wrt recent flux in my
perf/core branch, did just basic testing here, but so far it looks ok
(and fast!).

- Arnaldo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* [tip:perf/urgent] perf script python: Fix mem leak due to missing Py_DECREFs on dict entries
  2013-10-11  5:15 ` [PATCH 3/8] perf tools: Show progress on histogram collapsing Namhyung Kim
  2013-10-25 10:33   ` [tip:perf/core] " tip-bot for Namhyung Kim
@ 2013-10-28 15:03   ` tip-bot for Joseph Schuchart
  1 sibling, 0 replies; 91+ messages in thread
From: tip-bot for Joseph Schuchart @ 2013-10-28 15:03 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: acme, linux-kernel, paulus, hpa, mingo, a.p.zijlstra, tom.zanussi,
	joseph.schuchart, tglx

Commit-ID:  c0268e8d1f450e286fc55e77f53a9ede6b72acab
Gitweb:     http://git.kernel.org/tip/c0268e8d1f450e286fc55e77f53a9ede6b72acab
Author:     Joseph Schuchart <joseph.schuchart@tu-dresden.de>
AuthorDate: Thu, 24 Oct 2013 10:10:51 -0300
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Thu, 24 Oct 2013 10:16:54 -0300

perf script python: Fix mem leak due to missing Py_DECREFs on dict entries

We are using the Python scripting interface in perf to extract kernel
events relevant for performance analysis of HPC codes. We noticed that
the "perf script" call allocates a significant amount of memory (in the
order of several 100 MiB) during it's run, e.g. 125 MiB for a 25 MiB
input file:

  $> perf record -o perf.data -a -R -g fp \
       -e power:cpu_frequency -e sched:sched_switch \
       -e sched:sched_migrate_task -e sched:sched_process_exit \
       -e sched:sched_process_fork -e sched:sched_process_exec \
       -e cycles  -m 4096 --freq 4000
  $> /usr/bin/time perf script -i perf.data -s dummy_script.py
  0.84user 0.13system 0:01.92elapsed 51%CPU (0avgtext+0avgdata
  125532maxresident)k
  73072inputs+0outputs (57major+33086minor)pagefaults 0swaps

Upon further investigation using the valgrind massif tool, we noticed
that Python objects that are created in trace-event-python.c via
PyString_FromString*() (and their Integer and Long counterparts) are
never free'd.

The reason for this seem to be missing Py_DECREF calls on the objects
that are returned by these functions and stored in the Python
dictionaries. The Python dictionaries do not steal references (as
opposed to Python tuples and lists) but instead add their own reference.

Hence, the reference that is returned by these object creation functions
is never released and the memory is leaked. (see [1,2])

The attached patch fixes this by wrapping all relevant calls to
PyDict_SetItemString() and decrementing the reference counter
immediately after the Python function call.

This reduces the allocated memory to a reasonable amount:

  $> /usr/bin/time perf script -i perf.data -s dummy_script.py
  0.73user 0.05system 0:00.79elapsed 99%CPU (0avgtext+0avgdata
  49132maxresident)k
  0inputs+0outputs (0major+14045minor)pagefaults 0swaps

For comparison, with a 120 MiB input file the memory consumption
reported by time drops from almost 600 MiB to 146 MiB.

The patch has been tested using Linux 3.8.2 with Python 2.7.4 and Linux
3.11.6 with Python 2.7.5.

Please let me know if you need any further information.

[1] http://docs.python.org/2/c-api/tuple.html#PyTuple_SetItem
[2] http://docs.python.org/2/c-api/dict.html#PyDict_SetItemString

Signed-off-by: Joseph Schuchart <joseph.schuchart@tu-dresden.de>
Reviewed-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Link: http://lkml.kernel.org/r/1381468543-25334-4-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../util/scripting-engines/trace-event-python.c    | 37 ++++++++++++++--------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index cc75a3c..95d91a0 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -56,6 +56,17 @@ static void handler_call_die(const char *handler_name)
 	Py_FatalError("problem in Python trace event handler");
 }
 
+/*
+ * Insert val into into the dictionary and decrement the reference counter.
+ * This is necessary for dictionaries since PyDict_SetItemString() does not 
+ * steal a reference, as opposed to PyTuple_SetItem().
+ */
+static void pydict_set_item_string_decref(PyObject *dict, const char *key, PyObject *val)
+{
+	PyDict_SetItemString(dict, key, val);
+	Py_DECREF(val);
+}
+
 static void define_value(enum print_arg_type field_type,
 			 const char *ev_name,
 			 const char *field_name,
@@ -279,11 +290,11 @@ static void python_process_tracepoint(union perf_event *perf_event
 		PyTuple_SetItem(t, n++, PyInt_FromLong(pid));
 		PyTuple_SetItem(t, n++, PyString_FromString(comm));
 	} else {
-		PyDict_SetItemString(dict, "common_cpu", PyInt_FromLong(cpu));
-		PyDict_SetItemString(dict, "common_s", PyInt_FromLong(s));
-		PyDict_SetItemString(dict, "common_ns", PyInt_FromLong(ns));
-		PyDict_SetItemString(dict, "common_pid", PyInt_FromLong(pid));
-		PyDict_SetItemString(dict, "common_comm", PyString_FromString(comm));
+		pydict_set_item_string_decref(dict, "common_cpu", PyInt_FromLong(cpu));
+		pydict_set_item_string_decref(dict, "common_s", PyInt_FromLong(s));
+		pydict_set_item_string_decref(dict, "common_ns", PyInt_FromLong(ns));
+		pydict_set_item_string_decref(dict, "common_pid", PyInt_FromLong(pid));
+		pydict_set_item_string_decref(dict, "common_comm", PyString_FromString(comm));
 	}
 	for (field = event->format.fields; field; field = field->next) {
 		if (field->flags & FIELD_IS_STRING) {
@@ -313,7 +324,7 @@ static void python_process_tracepoint(union perf_event *perf_event
 		if (handler)
 			PyTuple_SetItem(t, n++, obj);
 		else
-			PyDict_SetItemString(dict, field->name, obj);
+			pydict_set_item_string_decref(dict, field->name, obj);
 
 	}
 	if (!handler)
@@ -370,21 +381,21 @@ static void python_process_general_event(union perf_event *perf_event
 	if (!handler || !PyCallable_Check(handler))
 		goto exit;
 
-	PyDict_SetItemString(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel)));
-	PyDict_SetItemString(dict, "attr", PyString_FromStringAndSize(
+	pydict_set_item_string_decref(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel)));
+	pydict_set_item_string_decref(dict, "attr", PyString_FromStringAndSize(
 			(const char *)&evsel->attr, sizeof(evsel->attr)));
-	PyDict_SetItemString(dict, "sample", PyString_FromStringAndSize(
+	pydict_set_item_string_decref(dict, "sample", PyString_FromStringAndSize(
 			(const char *)sample, sizeof(*sample)));
-	PyDict_SetItemString(dict, "raw_buf", PyString_FromStringAndSize(
+	pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize(
 			(const char *)sample->raw_data, sample->raw_size));
-	PyDict_SetItemString(dict, "comm",
+	pydict_set_item_string_decref(dict, "comm",
 			PyString_FromString(thread->comm));
 	if (al->map) {
-		PyDict_SetItemString(dict, "dso",
+		pydict_set_item_string_decref(dict, "dso",
 			PyString_FromString(al->map->dso->name));
 	}
 	if (al->sym) {
-		PyDict_SetItemString(dict, "symbol",
+		pydict_set_item_string_decref(dict, "symbol",
 			PyString_FromString(al->sym->name));
 	}
 

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-28 14:29                       ` Arnaldo Carvalho de Melo
@ 2013-10-28 16:05                         ` Frederic Weisbecker
  2013-10-28 17:01                           ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 91+ messages in thread
From: Frederic Weisbecker @ 2013-10-28 16:05 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Namhyung Kim, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds, Jiri Olsa,
	Ingo Molnar, Stephane Eranian

On Mon, Oct 28, 2013 at 11:29:12AM -0300, Arnaldo Carvalho de Melo wrote:
> Em Mon, Oct 28, 2013 at 09:43:09AM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Mon, Oct 28, 2013 at 11:12:50AM +0100, Frederic Weisbecker escreveu:
> > > On Mon, Oct 28, 2013 at 06:15:26PM +0900, Namhyung Kim wrote:
> > > > On Mon, 28 Oct 2013 10:09:46 +0100, Frederic Weisbecker wrote:
> > > > > Ah cool! Could you please remind me the name of that branch so that I
> > > > > can do some tests and work on top of it?
> 
> > > >   git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
> > > >     perf/callchain-v5
> 
> > > Hmm, I tried to rebase on top of tip:perf/core and git is getting confused I guess with
> > > which is applied and which is out of tree. There have been a lot of changes since
> > > then and thus quite some conflicts.
> > > 
> > > If you don't mind, I would love if you rebase against latest tip:perf/core as you
> > > know better than me what has been applied and what hasn't.
>  
> > I'll try to do it now.
> 
> Can you please try with:
> 
> 	git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git
> 	  perf/comm
> 
> And share your results? I fixed up everything wrt recent flux in my
> perf/core branch, did just basic testing here, but so far it looks ok
> (and fast!).

Ok I tried perf report, perf script and perf top and it looks good.
In fact it looks better than the tip tree. I fixes some bugs that I can
see in tip:/perf/core:

     $ perf record perf bench sched messaging
     $ perf report --stdio -s comm
     write failure on standard output: Bad file descriptor

Not sure where that comes from, but it's fixed in your tree. May be that's
on the fixes before the comm patches in your tree.

Also it differentiate between pre-exec and post-fork events, which looks
more precise (and it fixes some comm mangling as well):


Before:

     # Overhead          Command
     # ........  ...............
     #
       100.00%  sched-me

After:

     # Overhead          Command
     # ........  ...............
     #
        99.89%  sched-messaging
        0.11%             perf


Thanks!

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-28 16:05                         ` Frederic Weisbecker
@ 2013-10-28 17:01                           ` Arnaldo Carvalho de Melo
  2013-10-28 17:48                             ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-28 17:01 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Namhyung Kim, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds, Jiri Olsa,
	Ingo Molnar, Stephane Eranian

Em Mon, Oct 28, 2013 at 05:05:30PM +0100, Frederic Weisbecker escreveu:
> On Mon, Oct 28, 2013 at 11:29:12AM -0300, Arnaldo Carvalho de Melo wrote:
> > Em Mon, Oct 28, 2013 at 09:43:09AM -0300, Arnaldo Carvalho de Melo escreveu:
> > > Em Mon, Oct 28, 2013 at 11:12:50AM +0100, Frederic Weisbecker escreveu:
> > > > On Mon, Oct 28, 2013 at 06:15:26PM +0900, Namhyung Kim wrote:
> > > > > On Mon, 28 Oct 2013 10:09:46 +0100, Frederic Weisbecker wrote:
> > > > > > Ah cool! Could you please remind me the name of that branch so that I
> > > > > > can do some tests and work on top of it?
> > 
> > > > >   git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
> > > > >     perf/callchain-v5
> > 
> > > > Hmm, I tried to rebase on top of tip:perf/core and git is getting confused I guess with
> > > > which is applied and which is out of tree. There have been a lot of changes since
> > > > then and thus quite some conflicts.
> > > > 
> > > > If you don't mind, I would love if you rebase against latest tip:perf/core as you
> > > > know better than me what has been applied and what hasn't.
> >  
> > > I'll try to do it now.
> > 
> > Can you please try with:
> > 
> > 	git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git
> > 	  perf/comm
> > 
> > And share your results? I fixed up everything wrt recent flux in my
> > perf/core branch, did just basic testing here, but so far it looks ok
> > (and fast!).
> 
> Ok I tried perf report, perf script and perf top and it looks good.
> In fact it looks better than the tip tree. I fixes some bugs that I can

tip.tip has the borked disabling of MMAP2 that may explain some of the
problems you described here

> see in tip:/perf/core:
> 
>      $ perf record perf bench sched messaging
>      $ perf report --stdio -s comm
>      write failure on standard output: Bad file descriptor
> 
> Not sure where that comes from, but it's fixed in your tree. May be that's
> on the fixes before the comm patches in your tree.
> 
> Also it differentiate between pre-exec and post-fork events, which looks
> more precise (and it fixes some comm mangling as well):
> 
> 
> Before:
> 
>      # Overhead          Command
>      # ........  ...............
>      #
>        100.00%  sched-me

I noticed the above, just on the --stdio tho, checking why this is so...
Can you try without --stdio, even using --tui?

- Arnaldo

> 
> After:
> 
>      # Overhead          Command
>      # ........  ...............
>      #
>         99.89%  sched-messaging
>         0.11%             perf
> 
> 
> Thanks!

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 1/4] perf tools: Split -g and --call-graph for record command
  2013-10-27 15:30                           ` David Ahern
@ 2013-10-28 17:46                             ` Arnaldo Carvalho de Melo
  2013-10-28 18:20                               ` David Ahern
  0 siblings, 1 reply; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-28 17:46 UTC (permalink / raw)
  To: David Ahern
  Cc: Jiri Olsa, linux-kernel, Corey Ashford, Ingo Molnar, Namhyung Kim,
	Paul Mackerras, Peter Zijlstra, Andi Kleen, Adrian Hunter

Em Sun, Oct 27, 2013 at 09:30:06AM -0600, David Ahern escreveu:
> On 10/26/13 8:25 AM, Jiri Olsa wrote:
> >diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
> >index f10ab63..7be62770 100644
> >--- a/tools/perf/Documentation/perf-record.txt
> >+++ b/tools/perf/Documentation/perf-record.txt
> >@@ -92,8 +92,12 @@ OPTIONS
> >  	size is rounded up to have nearest pages power of two value.
> >
> >  -g::
> >+	Enables call-graph (stack chain/backtrace) recording.
> >+
> >  --call-graph::
> >-	Do call-graph (stack chain/backtrace) recording.
> >+	Setup and enable call-graph (stack chain/backtrace) recording,
> >+	implies -g.
> >+
> 
> This needs some more words as to why it is used over -g. It is also
> missing the options that can be given (fp or dwarf).

Added this:

--call-graph::
        Setup and enable call-graph (stack chain/backtrace) recording,
        implies -g.

        Allows specifying "fp" (frame pointer) or "dwarf"
        (DWARF's CFI - Call Frame Information) as the method to collect
        the information used to show the call graphs.

        In some systems, where binaries are build wit gcc
        --fomit-frame-pointer, using the "fp" method will produce bogus
        call graphs, using "dwarf", if available (perf tools linked to
        the libunwind library) should be used instead.

 
> ---8<---
> 
> >@@ -825,9 +851,12 @@ const struct option record_options[] = {
> >  		     perf_evlist__parse_mmap_pages),
> >  	OPT_BOOLEAN(0, "group", &record.opts.group,
> >  		    "put the counters into a counter group"),
> >-	OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
> >-			     "mode[,dump_size]", record_callchain_help,
> >-			     &record_parse_callchain_opt, "fp"),
> >+	OPT_CALLBACK(0, "call-graph", &record.opts,
> >+		     "mode[,dump_size]", record_callchain_help,
> >+		     &record_parse_callchain_opt),
> >+	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
> >+			   NULL, "enables call-graph recording" ,
> >+			   &record_callchain_opt),
> 
> From a user/readability perspective I would prefer the order to be
> -g then --call-graph especially since --call-graph is like an
> advanced -g where you get more control over the method used.

Reordered as suggested.
 
> Other than that:
> 
> Tested-Reviewed-by: David Ahern <dsahern@gmail.com>

Thanks, adding those two.

And reworking it to make it apply against my perf/urgent branch, trying to fix
this annoyance on this merge window, as suggested by Ingo.

- Arnaldo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-28 17:01                           ` Arnaldo Carvalho de Melo
@ 2013-10-28 17:48                             ` Arnaldo Carvalho de Melo
  2013-10-29  9:20                               ` Frederic Weisbecker
  0 siblings, 1 reply; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-28 17:48 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Namhyung Kim, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds, Jiri Olsa,
	Ingo Molnar, Stephane Eranian

Em Mon, Oct 28, 2013 at 02:01:11PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Mon, Oct 28, 2013 at 05:05:30PM +0100, Frederic Weisbecker escreveu:
> > 
> > Also it differentiate between pre-exec and post-fork events, which looks
> > more precise (and it fixes some comm mangling as well):
> > 
> > 
> > Before:
> > 
> >      # Overhead          Command
> >      # ........  ...............
> >      #
> >        100.00%  sched-me
> 
> I noticed the above, just on the --stdio tho, checking why this is so...
> Can you try without --stdio, even using --tui?

Thanks for this report, if you try my perf/urgent now it should be
fixed, was a problem fixed by Jiri Olsa that I thought affected only
perf/core.

I cherry picked it and it fixed the problem for me, can you check that?

- Arnaldo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 1/4] perf tools: Split -g and --call-graph for record command
  2013-10-28 17:46                             ` Arnaldo Carvalho de Melo
@ 2013-10-28 18:20                               ` David Ahern
  2013-10-29  5:13                                 ` Namhyung Kim
  0 siblings, 1 reply; 91+ messages in thread
From: David Ahern @ 2013-10-28 18:20 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Jiri Olsa, linux-kernel, Corey Ashford, Ingo Molnar, Namhyung Kim,
	Paul Mackerras, Peter Zijlstra, Andi Kleen, Adrian Hunter

On 10/28/13 11:46 AM, Arnaldo Carvalho de Melo wrote:
> Added this:
>
> --call-graph::
>          Setup and enable call-graph (stack chain/backtrace) recording,
>          implies -g.
>
>          Allows specifying "fp" (frame pointer) or "dwarf"
>          (DWARF's CFI - Call Frame Information) as the method to collect
>          the information used to show the call graphs.
>
>          In some systems, where binaries are build wit gcc

s/wit/with/

David

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 1/4] perf tools: Split -g and --call-graph for record command
  2013-10-28 18:20                               ` David Ahern
@ 2013-10-29  5:13                                 ` Namhyung Kim
  0 siblings, 0 replies; 91+ messages in thread
From: Namhyung Kim @ 2013-10-29  5:13 UTC (permalink / raw)
  To: David Ahern
  Cc: Arnaldo Carvalho de Melo, Jiri Olsa, linux-kernel, Corey Ashford,
	Ingo Molnar, Paul Mackerras, Peter Zijlstra, Andi Kleen,
	Adrian Hunter

On Mon, 28 Oct 2013 12:20:53 -0600, David Ahern wrote:
> On 10/28/13 11:46 AM, Arnaldo Carvalho de Melo wrote:
>> Added this:
>>
>> --call-graph::
>>          Setup and enable call-graph (stack chain/backtrace) recording,
>>          implies -g.
>>
>>          Allows specifying "fp" (frame pointer) or "dwarf"
>>          (DWARF's CFI - Call Frame Information) as the method to collect
>>          the information used to show the call graphs.
>>
>>          In some systems, where binaries are build wit gcc
>
> s/wit/with/

And also forgot to mention user stack dump size (and its default size)
in case of "dwarf".

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 91+ messages in thread

* [tip:perf/urgent] perf record: Split -g and --call-graph
  2013-10-26 14:25                         ` [PATCH 1/4] perf tools: Split -g and --call-graph for record command Jiri Olsa
  2013-10-27 15:30                           ` David Ahern
  2013-10-28  7:59                           ` Namhyung Kim
@ 2013-10-29  8:22                           ` tip-bot for Jiri Olsa
  2 siblings, 0 replies; 91+ messages in thread
From: tip-bot for Jiri Olsa @ 2013-10-29  8:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: acme, linux-kernel, paulus, hpa, mingo, andi, a.p.zijlstra,
	namhyung, jolsa, dsahern, adrian.hunter, tglx, cjashfor, mingo

Commit-ID:  09b0fd45ff63413df94cbd832a765076b201edbb
Gitweb:     http://git.kernel.org/tip/09b0fd45ff63413df94cbd832a765076b201edbb
Author:     Jiri Olsa <jolsa@redhat.com>
AuthorDate: Sat, 26 Oct 2013 16:25:33 +0200
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Mon, 28 Oct 2013 16:05:59 -0300

perf record: Split -g and --call-graph

Splitting -g and --call-graph for record command, so we could use '-g'
with no option.

The '-g' option now takes NO argument and enables the configured unwind
method, which is currently the frame pointers method.

It will be possible to configure unwind method via config file in
upcoming patches.

All current '-g' arguments is overtaken by --call-graph option.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: David Ahern <dsahern@gmail.com>
Tested-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: David Ahern <dsahern@gmail.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1382797536-32303-2-git-send-email-jolsa@redhat.com
[ reordered -g/--call-graph on --help and expanded the man page
  according to comments by David Ahern and Namhyung Kim ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt | 14 +++++-
 tools/perf/builtin-record.c              | 73 ++++++++++++++++++++++----------
 tools/perf/util/callchain.h              |  3 ++
 3 files changed, 67 insertions(+), 23 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index e297b74..ca0d3d9 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -90,8 +90,20 @@ OPTIONS
 	Number of mmap data pages. Must be a power of two.
 
 -g::
+	Enables call-graph (stack chain/backtrace) recording.
+
 --call-graph::
-	Do call-graph (stack chain/backtrace) recording.
+	Setup and enable call-graph (stack chain/backtrace) recording,
+	implies -g.
+
+	Allows specifying "fp" (frame pointer) or "dwarf"
+	(DWARF's CFI - Call Frame Information) as the method to collect
+	the information used to show the call graphs.
+
+	In some systems, where binaries are build with gcc
+	--fomit-frame-pointer, using the "fp" method will produce bogus
+	call graphs, using "dwarf", if available (perf tools linked to
+	the libunwind library) should be used instead.
 
 -q::
 --quiet::
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a41ac415..d046514 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -712,21 +712,12 @@ static int get_stack_size(char *str, unsigned long *_size)
 }
 #endif /* LIBUNWIND_SUPPORT */
 
-int record_parse_callchain_opt(const struct option *opt,
-			       const char *arg, int unset)
+int record_parse_callchain(const char *arg, struct perf_record_opts *opts)
 {
-	struct perf_record_opts *opts = opt->value;
 	char *tok, *name, *saveptr = NULL;
 	char *buf;
 	int ret = -1;
 
-	/* --no-call-graph */
-	if (unset)
-		return 0;
-
-	/* We specified default option if none is provided. */
-	BUG_ON(!arg);
-
 	/* We need buffer that we know we can write to. */
 	buf = malloc(strlen(arg) + 1);
 	if (!buf)
@@ -764,13 +755,9 @@ int record_parse_callchain_opt(const struct option *opt,
 				ret = get_stack_size(tok, &size);
 				opts->stack_dump_size = size;
 			}
-
-			if (!ret)
-				pr_debug("callchain: stack dump size %d\n",
-					 opts->stack_dump_size);
 #endif /* LIBUNWIND_SUPPORT */
 		} else {
-			pr_err("callchain: Unknown -g option "
+			pr_err("callchain: Unknown --call-graph option "
 			       "value: %s\n", arg);
 			break;
 		}
@@ -778,13 +765,52 @@ int record_parse_callchain_opt(const struct option *opt,
 	} while (0);
 
 	free(buf);
+	return ret;
+}
+
+static void callchain_debug(struct perf_record_opts *opts)
+{
+	pr_debug("callchain: type %d\n", opts->call_graph);
 
+	if (opts->call_graph == CALLCHAIN_DWARF)
+		pr_debug("callchain: stack dump size %d\n",
+			 opts->stack_dump_size);
+}
+
+int record_parse_callchain_opt(const struct option *opt,
+			       const char *arg,
+			       int unset)
+{
+	struct perf_record_opts *opts = opt->value;
+	int ret;
+
+	/* --no-call-graph */
+	if (unset) {
+		opts->call_graph = CALLCHAIN_NONE;
+		pr_debug("callchain: disabled\n");
+		return 0;
+	}
+
+	ret = record_parse_callchain(arg, opts);
 	if (!ret)
-		pr_debug("callchain: type %d\n", opts->call_graph);
+		callchain_debug(opts);
 
 	return ret;
 }
 
+int record_callchain_opt(const struct option *opt,
+			 const char *arg __maybe_unused,
+			 int unset __maybe_unused)
+{
+	struct perf_record_opts *opts = opt->value;
+
+	if (opts->call_graph == CALLCHAIN_NONE)
+		opts->call_graph = CALLCHAIN_FP;
+
+	callchain_debug(opts);
+	return 0;
+}
+
 static const char * const record_usage[] = {
 	"perf record [<options>] [<command>]",
 	"perf record [<options>] -- <command> [<options>]",
@@ -813,12 +839,12 @@ static struct perf_record record = {
 	},
 };
 
-#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
+#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
 
 #ifdef LIBUNWIND_SUPPORT
-const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
+const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
 #else
-const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
+const char record_callchain_help[] = CALLCHAIN_HELP "fp";
 #endif
 
 /*
@@ -858,9 +884,12 @@ const struct option record_options[] = {
 		     "number of mmap data pages"),
 	OPT_BOOLEAN(0, "group", &record.opts.group,
 		    "put the counters into a counter group"),
-	OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
-			     "mode[,dump_size]", record_callchain_help,
-			     &record_parse_callchain_opt, "fp"),
+	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
+			   NULL, "enables call-graph recording" ,
+			   &record_callchain_opt),
+	OPT_CALLBACK(0, "call-graph", &record.opts,
+		     "mode[,dump_size]", record_callchain_help,
+		     &record_parse_callchain_opt),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show counter open errors, etc)"),
 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 2b585bc..9e99060 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -147,6 +147,9 @@ static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
 
 struct option;
 
+int record_parse_callchain(const char *arg, struct perf_record_opts *opts);
 int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
+int record_callchain_opt(const struct option *opt, const char *arg, int unset);
+
 extern const char record_callchain_help[];
 #endif	/* __PERF_CALLCHAIN_H */

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* [tip:perf/urgent] perf top: Split -G and --call-graph
  2013-10-26 14:25                         ` [PATCH 2/4] perf tools: Split -G and --call-graph for top command Jiri Olsa
  2013-10-27 15:34                           ` David Ahern
@ 2013-10-29  8:22                           ` tip-bot for Jiri Olsa
  1 sibling, 0 replies; 91+ messages in thread
From: tip-bot for Jiri Olsa @ 2013-10-29  8:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: acme, linux-kernel, paulus, hpa, mingo, andi, a.p.zijlstra,
	namhyung, jolsa, dsahern, adrian.hunter, tglx, cjashfor, mingo

Commit-ID:  ae779a630977d93fbebfa06216ea47df5b5c62c8
Gitweb:     http://git.kernel.org/tip/ae779a630977d93fbebfa06216ea47df5b5c62c8
Author:     Jiri Olsa <jolsa@redhat.com>
AuthorDate: Sat, 26 Oct 2013 16:25:34 +0200
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Mon, 28 Oct 2013 16:06:00 -0300

perf top: Split -G and --call-graph

Splitting -G and --call-graph for record command, so we could use '-G'
with no option.

The '-G' option now takes NO argument and enables the configured unwind
method, which is currently the frame pointers method.

It will be possible to configure unwind method via config file in
upcoming patches.

All current '-G' arguments is overtaken by --call-graph option.

NOTE: The documentation for top --call-graph option
      was wrongly copied from report command.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: David Ahern <dsahern@gmail.com>
Tested-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: David Ahern <dsahern@gmail.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1382797536-32303-3-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-top.txt | 18 +++++-------------
 tools/perf/builtin-top.c              | 23 +++++++++++++----------
 2 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 58d6598..6a118e7 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -140,20 +140,12 @@ Default is to monitor all CPUS.
 --asm-raw::
 	Show raw instruction encoding of assembly instructions.
 
--G [type,min,order]::
+-G::
+	Enables call-graph (stack chain/backtrace) recording.
+
 --call-graph::
-        Display call chains using type, min percent threshold and order.
-	type can be either:
-	- flat: single column, linear exposure of call chains.
-	- graph: use a graph tree, displaying absolute overhead rates.
-	- fractal: like graph, but displays relative rates. Each branch of
-		 the tree is considered as a new profiled object.
-
-	order can be either:
-	- callee: callee based call graph.
-	- caller: inverted caller based call graph.
-
-	Default: fractal,0.5,callee.
+	Setup and enable call-graph (stack chain/backtrace) recording,
+	implies -G.
 
 --ignore-callees=<regex>::
         Ignore callees of the function(s) matching the given regex.
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 2122141..0df298a 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1016,16 +1016,16 @@ out_delete:
 }
 
 static int
-parse_callchain_opt(const struct option *opt, const char *arg, int unset)
+callchain_opt(const struct option *opt, const char *arg, int unset)
 {
-	/*
-	 * --no-call-graph
-	 */
-	if (unset)
-		return 0;
-
 	symbol_conf.use_callchain = true;
+	return record_callchain_opt(opt, arg, unset);
+}
 
+static int
+parse_callchain_opt(const struct option *opt, const char *arg, int unset)
+{
+	symbol_conf.use_callchain = true;
 	return record_parse_callchain_opt(opt, arg, unset);
 }
 
@@ -1106,9 +1106,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 		   "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight"),
 	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
 		    "Show a column with the number of samples"),
-	OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
-			     "mode[,dump_size]", record_callchain_help,
-			     &parse_callchain_opt, "fp"),
+	OPT_CALLBACK_NOOPT('G', NULL, &top.record_opts,
+			   NULL, "enables call-graph recording",
+			   &callchain_opt),
+	OPT_CALLBACK(0, "call-graph", &top.record_opts,
+		     "mode[,dump_size]", record_callchain_help,
+		     &parse_callchain_opt),
 	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
 		   "ignore callees of these functions in call graphs",
 		   report_parse_ignore_callees_opt),

^ permalink raw reply related	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-28 17:48                             ` Arnaldo Carvalho de Melo
@ 2013-10-29  9:20                               ` Frederic Weisbecker
  2013-10-29 13:06                                 ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 91+ messages in thread
From: Frederic Weisbecker @ 2013-10-29  9:20 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Namhyung Kim, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds, Jiri Olsa,
	Ingo Molnar, Stephane Eranian

On Mon, Oct 28, 2013 at 02:48:43PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Mon, Oct 28, 2013 at 02:01:11PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Mon, Oct 28, 2013 at 05:05:30PM +0100, Frederic Weisbecker escreveu:
> > > 
> > > Also it differentiate between pre-exec and post-fork events, which looks
> > > more precise (and it fixes some comm mangling as well):
> > > 
> > > 
> > > Before:
> > > 
> > >      # Overhead          Command
> > >      # ........  ...............
> > >      #
> > >        100.00%  sched-me
> > 
> > I noticed the above, just on the --stdio tho, checking why this is so...
> > Can you try without --stdio, even using --tui?
> 
> Thanks for this report, if you try my perf/urgent now it should be
> fixed, was a problem fixed by Jiri Olsa that I thought affected only
> perf/core.
> 
> I cherry picked it and it fixed the problem for me, can you check that?

Yep, I checked your perf/urgent queue 8e50d384cc1d5afd2989cf0f7093756ed7164eb2 and it's
fixed there, although I can't find the particular patch fixing this, but it works :)

Thanks.

> 
> - Arnaldo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig
  2013-10-28  8:10                           ` Namhyung Kim
@ 2013-10-29 10:18                             ` Jiri Olsa
  2013-10-29 12:43                               ` Arnaldo Carvalho de Melo
  2013-11-01 15:20                               ` Jiri Olsa
  0 siblings, 2 replies; 91+ messages in thread
From: Jiri Olsa @ 2013-10-29 10:18 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: linux-kernel, Corey Ashford, David Ahern, Ingo Molnar,
	Paul Mackerras, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Andi Kleen, Adrian Hunter

On Mon, Oct 28, 2013 at 05:10:54PM +0900, Namhyung Kim wrote:
> On Sat, 26 Oct 2013 16:25:35 +0200, Jiri Olsa wrote:
> > Adding call-graph option support into .perfconfig file,
> > so it's now possible use call-graph option like:
> >
> >   [top]
> >         call-graph = fp
> >
> >   [record]
> >         call-graph = dwarf,8192
> >
> > Above options ONLY setup the unwind method. To enable
> > perf record/top to actually use it the command line
> > option -g/-G must be specified.
> >
> > The --call-graph option overloads .perfconfig setup.
> >
> > Assuming above configuration:
> >
> >   $ perf record -g ls
> >   - enables dwarf unwind with user stack size dump 8192 bytes
> >
> >   $ perf top -G
> >   - enables frame pointer unwind
> >
> >   $ perf record --call-graph=fp ls
> >   - enables frame pointer unwind
> >
> >   $ perf top --call-graph=dwarf,4096 ls
> >   - enables dwarf unwind with user stack size dump 4096 bytes
> >
> [SNIP]
> > --- a/tools/perf/builtin-record.c
> > +++ b/tools/perf/builtin-record.c
> > @@ -750,6 +750,8 @@ int record_parse_callchain_opt(const struct option *opt,
> >  	struct perf_record_opts *opts = opt->value;
> >  	int ret;
> >  
> > +	opts->call_graph_enabled = !unset;
> > +
> 
> Why not just using symbol_conf.use_callchain?
> 

right, this way we'd be in sync with top

jirka

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 1/4] perf tools: Split -g and --call-graph for record command
  2013-10-28  7:59                           ` Namhyung Kim
@ 2013-10-29 10:18                             ` Jiri Olsa
  2013-10-29 12:42                               ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 91+ messages in thread
From: Jiri Olsa @ 2013-10-29 10:18 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: linux-kernel, Corey Ashford, David Ahern, Ingo Molnar,
	Paul Mackerras, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Andi Kleen, Adrian Hunter

On Mon, Oct 28, 2013 at 04:59:16PM +0900, Namhyung Kim wrote:
> Hi Jiri,
> 
> On Sat, 26 Oct 2013 16:25:33 +0200, Jiri Olsa wrote:
> > Splitting -g and --call-graph for record command, so we could
> > use '-g' with no option.
> >
> > The '-g' option now takes NO argument and enables the configured
> > unwind method, which is currently the frame pointers method.
> >
> > It will be possible to configure unwind method via config
> > file in upcoming patches.
> >
> > All current '-g' arguments is overtaken by --call-graph option.
> 
>   $ perf record --call-graph none -- true
>   callchain: Unknown -g option value: none
> 
> It should be
> 
>   callchain: Unknown --call-graph option value: none

ok

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 0/4] perf tools: Fix -g option handling
  2013-10-27  6:56                           ` Ingo Molnar
@ 2013-10-29 10:21                             ` Jiri Olsa
  2013-10-29 10:25                               ` Ingo Molnar
  0 siblings, 1 reply; 91+ messages in thread
From: Jiri Olsa @ 2013-10-29 10:21 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Corey Ashford, David Ahern, Ingo Molnar,
	Namhyung Kim, Paul Mackerras, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Andi Kleen, Adrian Hunter

On Sun, Oct 27, 2013 at 07:56:05AM +0100, Ingo Molnar wrote:
> 
> * Jiri Olsa <jolsa@redhat.com> wrote:
> 
> > On Sat, Oct 26, 2013 at 04:25:32PM +0200, Jiri Olsa wrote:
> > > hi,
> > > changing the '-g/-G' options for record/top commands
> > > to take NO argument and enable unwind method based
> > > on .perfconfig setup (using FP by default).
> > > 
> > > The current -g option parsing moves into the
> > > '--call-graph' option.
> > 
> > forgot to mention branch:
> > 
> >   git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
> >   perf/cc
> 
> I tested it - works perfectly now for me:
> 
> Acked-and-tested-by: Ingo Molnar <mingo@kernel.org>
> 
> You might want to give --call-graph a single letter option name as well, 
> -g, -G, -c, -C are all taken already, but both -l and -L are still 
> available - maybe use -L for it?

ok, I'll addd the -L
I'll send it with fixies for other comments

thanks,
jirka

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 0/4] perf tools: Fix -g option handling
  2013-10-29 10:21                             ` Jiri Olsa
@ 2013-10-29 10:25                               ` Ingo Molnar
  0 siblings, 0 replies; 91+ messages in thread
From: Ingo Molnar @ 2013-10-29 10:25 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: linux-kernel, Corey Ashford, David Ahern, Ingo Molnar,
	Namhyung Kim, Paul Mackerras, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Andi Kleen, Adrian Hunter


* Jiri Olsa <jolsa@redhat.com> wrote:

> On Sun, Oct 27, 2013 at 07:56:05AM +0100, Ingo Molnar wrote:
> > 
> > * Jiri Olsa <jolsa@redhat.com> wrote:
> > 
> > > On Sat, Oct 26, 2013 at 04:25:32PM +0200, Jiri Olsa wrote:
> > > > hi,
> > > > changing the '-g/-G' options for record/top commands
> > > > to take NO argument and enable unwind method based
> > > > on .perfconfig setup (using FP by default).
> > > > 
> > > > The current -g option parsing moves into the
> > > > '--call-graph' option.
> > > 
> > > forgot to mention branch:
> > > 
> > >   git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
> > >   perf/cc
> > 
> > I tested it - works perfectly now for me:
> > 
> > Acked-and-tested-by: Ingo Molnar <mingo@kernel.org>
> > 
> > You might want to give --call-graph a single letter option name as well, 
> > -g, -G, -c, -C are all taken already, but both -l and -L are still 
> > available - maybe use -L for it?
> 
> ok, I'll addd the -L
> I'll send it with fixies for other comments

Please do it against tip:perf/core aac898548d04 or later (I just 
pushed it out, it might take a few minutes to propagate), the most 
important parts are in perf/urgent already (which is merged into 
aac898548d04), we can do the rest for v3.13 as delta fixlets.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 1/4] perf tools: Split -g and --call-graph for record command
  2013-10-29 10:18                             ` Jiri Olsa
@ 2013-10-29 12:42                               ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-29 12:42 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Namhyung Kim, linux-kernel, Corey Ashford, David Ahern,
	Ingo Molnar, Paul Mackerras, Peter Zijlstra, Andi Kleen,
	Adrian Hunter

Em Tue, Oct 29, 2013 at 11:18:50AM +0100, Jiri Olsa escreveu:
> On Mon, Oct 28, 2013 at 04:59:16PM +0900, Namhyung Kim wrote:
> > > All current '-g' arguments is overtaken by --call-graph option.

> >   $ perf record --call-graph none -- true

> > 
> > It should be

> >   callchain: Unknown --call-graph option value: none

> ok

I fixed this one, patch went thru perf/urgent, Ingo already merged.

- Arnaldo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig
  2013-10-29 10:18                             ` Jiri Olsa
@ 2013-10-29 12:43                               ` Arnaldo Carvalho de Melo
  2013-10-29 12:46                                 ` Ingo Molnar
  2013-11-01 15:20                               ` Jiri Olsa
  1 sibling, 1 reply; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-29 12:43 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Namhyung Kim, linux-kernel, Corey Ashford, David Ahern,
	Ingo Molnar, Paul Mackerras, Peter Zijlstra, Andi Kleen,
	Adrian Hunter

Em Tue, Oct 29, 2013 at 11:18:21AM +0100, Jiri Olsa escreveu:
> On Mon, Oct 28, 2013 at 05:10:54PM +0900, Namhyung Kim wrote:
> > > +	opts->call_graph_enabled = !unset;
> > > +
> > 
> > Why not just using symbol_conf.use_callchain?
> > 
> 
> right, this way we'd be in sync with top

Cool, so a patch on top of what is in perf/urgent would be handy, I'll
deal with clashes with what is in perf/core, talking about that I think
I should do a merge sooner...

- ARnaldo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig
  2013-10-29 12:43                               ` Arnaldo Carvalho de Melo
@ 2013-10-29 12:46                                 ` Ingo Molnar
  0 siblings, 0 replies; 91+ messages in thread
From: Ingo Molnar @ 2013-10-29 12:46 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Jiri Olsa, Namhyung Kim, linux-kernel, Corey Ashford, David Ahern,
	Ingo Molnar, Paul Mackerras, Peter Zijlstra, Andi Kleen,
	Adrian Hunter


* Arnaldo Carvalho de Melo <acme@redhat.com> wrote:

> Em Tue, Oct 29, 2013 at 11:18:21AM +0100, Jiri Olsa escreveu:
> > On Mon, Oct 28, 2013 at 05:10:54PM +0900, Namhyung Kim wrote:
> > > > +	opts->call_graph_enabled = !unset;
> > > > +
> > > 
> > > Why not just using symbol_conf.use_callchain?
> > > 
> > 
> > right, this way we'd be in sync with top
> 
> Cool, so a patch on top of what is in perf/urgent would be handy, 
> I'll deal with clashes with what is in perf/core, talking about 
> that I think I should do a merge sooner...

Note that I have resolved those conflicts between perf/urgent and 
perf/core in the latest tip:perf/core tree, please double check the 
conflict resolution and base further work on that if it looks good 
to you.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 6/8] perf tools: Add new comm infrastructure
  2013-10-29  9:20                               ` Frederic Weisbecker
@ 2013-10-29 13:06                                 ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 91+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-10-29 13:06 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Namhyung Kim, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Namhyung Kim, LKML, Linus Torvalds, Jiri Olsa,
	Ingo Molnar, Stephane Eranian

Em Tue, Oct 29, 2013 at 10:20:33AM +0100, Frederic Weisbecker escreveu:
> On Mon, Oct 28, 2013 at 02:48:43PM -0300, Arnaldo Carvalho de Melo wrote:
> > Em Mon, Oct 28, 2013 at 02:01:11PM -0300, Arnaldo Carvalho de Melo escreveu:
> > > Em Mon, Oct 28, 2013 at 05:05:30PM +0100, Frederic Weisbecker escreveu:
> > > I noticed the above, just on the --stdio tho, checking why this is so...
> > > Can you try without --stdio, even using --tui?

> > Thanks for this report, if you try my perf/urgent now it should be
> > fixed, was a problem fixed by Jiri Olsa that I thought affected only
> > perf/core.

> > I cherry picked it and it fixed the problem for me, can you check that?

> Yep, I checked your perf/urgent queue 8e50d384cc1d5afd2989cf0f7093756ed7164eb2 and it's
> fixed there, although I can't find the particular patch fixing this, but it works :)

This is the one, by Jiri:

https://git.kernel.org/cgit/linux/kernel/git/acme/linux.git/commit/?h=perf/urgent&id=9754c4f9b23d5ce6756514acdf134ad61470734a

- Arnaldo

^ permalink raw reply	[flat|nested] 91+ messages in thread

* Re: [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig
  2013-10-29 10:18                             ` Jiri Olsa
  2013-10-29 12:43                               ` Arnaldo Carvalho de Melo
@ 2013-11-01 15:20                               ` Jiri Olsa
  1 sibling, 0 replies; 91+ messages in thread
From: Jiri Olsa @ 2013-11-01 15:20 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: linux-kernel, Corey Ashford, David Ahern, Ingo Molnar,
	Paul Mackerras, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Andi Kleen, Adrian Hunter

On Tue, Oct 29, 2013 at 11:18:21AM +0100, Jiri Olsa wrote:
> On Mon, Oct 28, 2013 at 05:10:54PM +0900, Namhyung Kim wrote:
> > On Sat, 26 Oct 2013 16:25:35 +0200, Jiri Olsa wrote:
> > > Adding call-graph option support into .perfconfig file,
> > > so it's now possible use call-graph option like:
> > >
> > >   [top]
> > >         call-graph = fp
> > >
> > >   [record]
> > >         call-graph = dwarf,8192
> > >
> > > Above options ONLY setup the unwind method. To enable
> > > perf record/top to actually use it the command line
> > > option -g/-G must be specified.
> > >
> > > The --call-graph option overloads .perfconfig setup.
> > >
> > > Assuming above configuration:
> > >
> > >   $ perf record -g ls
> > >   - enables dwarf unwind with user stack size dump 8192 bytes
> > >
> > >   $ perf top -G
> > >   - enables frame pointer unwind
> > >
> > >   $ perf record --call-graph=fp ls
> > >   - enables frame pointer unwind
> > >
> > >   $ perf top --call-graph=dwarf,4096 ls
> > >   - enables dwarf unwind with user stack size dump 4096 bytes
> > >
> > [SNIP]
> > > --- a/tools/perf/builtin-record.c
> > > +++ b/tools/perf/builtin-record.c
> > > @@ -750,6 +750,8 @@ int record_parse_callchain_opt(const struct option *opt,
> > >  	struct perf_record_opts *opts = opt->value;
> > >  	int ret;
> > >  
> > > +	opts->call_graph_enabled = !unset;
> > > +
> > 
> > Why not just using symbol_conf.use_callchain?
> > 
> 
> right, this way we'd be in sync with top

hum, on the other hand using symbol_conf.use_callchain within
utils/evsel.c adds symbol object dependency to python code
which I'm not sure is something we want ;-)

I'll keep it as it is in v2

jirka

^ permalink raw reply	[flat|nested] 91+ messages in thread

* [tip:perf/core] perf tools: Compare hists comm by addresses
  2013-10-11  5:15 ` [PATCH 7/8] perf tools: Compare hists comm by addresses Namhyung Kim
@ 2013-11-04 20:19   ` tip-bot for Frederic Weisbecker
  0 siblings, 0 replies; 91+ messages in thread
From: tip-bot for Frederic Weisbecker @ 2013-11-04 20:19 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, eranian, acme, hpa, mingo, a.p.zijlstra, namhyung,
	jolsa, fweisbec, dsahern, tglx, mingo

Commit-ID:  fedd63d3cdc9004df43b02df5c874b8957992fe8
Gitweb:     http://git.kernel.org/tip/fedd63d3cdc9004df43b02df5c874b8957992fe8
Author:     Frederic Weisbecker <fweisbec@gmail.com>
AuthorDate: Wed, 11 Sep 2013 17:18:09 +0200
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Mon, 4 Nov 2013 12:14:59 -0300

perf tools: Compare hists comm by addresses

Now that comm strings are allocated only once and refcounted to be shared
among threads, these can now be safely compared by addresses. This
should remove most hists collapses on post processing.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1381468543-25334-8-git-send-email-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/sort.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 835e8bd..bf91d0e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -80,7 +80,8 @@ struct sort_entry sort_thread = {
 static int64_t
 sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	return right->thread->tid - left->thread->tid;
+	/* Compare the addr that should be unique among comm */
+	return thread__comm_str(right->thread) - thread__comm_str(left->thread);
 }
 
 static int64_t

^ permalink raw reply related	[flat|nested] 91+ messages in thread

end of thread, other threads:[~2013-11-04 20:20 UTC | newest]

Thread overview: 91+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-10-11  5:15 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Namhyung Kim
2013-10-11  5:15 ` [PATCH 1/8] perf callchain: Convert children list to rbtree Namhyung Kim
2013-10-23  7:54   ` [tip:perf/core] " tip-bot for Namhyung Kim
2013-10-23 11:07     ` Frederic Weisbecker
2013-10-23 12:45       ` Arnaldo Carvalho de Melo
2013-10-11  5:15 ` [PATCH 2/8] perf ui/progress: Add new helper functions for progress bar Namhyung Kim
2013-10-21 18:13   ` Arnaldo Carvalho de Melo
2013-10-22 18:12     ` Namhyung Kim
2013-10-22 13:06       ` Arnaldo Carvalho de Melo
2013-10-11  5:15 ` [PATCH 3/8] perf tools: Show progress on histogram collapsing Namhyung Kim
2013-10-25 10:33   ` [tip:perf/core] " tip-bot for Namhyung Kim
2013-10-28 15:03   ` [tip:perf/urgent] perf script python: Fix mem leak due to missing Py_DECREFs on dict entries tip-bot for Joseph Schuchart
2013-10-11  5:15 ` [PATCH 4/8] perf tools: Use an accessor to read thread comm Namhyung Kim
2013-10-11  5:15 ` [PATCH 5/8] perf tools: Add time argument on comm setting Namhyung Kim
2013-10-11  5:15 ` [PATCH 6/8] perf tools: Add new comm infrastructure Namhyung Kim
2013-10-25 10:56   ` Frederic Weisbecker
2013-10-25 13:04     ` Arnaldo Carvalho de Melo
2013-10-25 15:33       ` David Ahern
2013-10-25 18:12         ` Frederic Weisbecker
2013-10-25 18:14           ` Arnaldo Carvalho de Melo
2013-10-25 18:19           ` David Ahern
2013-10-28  5:38             ` Namhyung Kim
2013-10-28  9:09               ` Frederic Weisbecker
2013-10-28  9:15                 ` Namhyung Kim
2013-10-28 10:12                   ` Frederic Weisbecker
2013-10-28 12:43                     ` Arnaldo Carvalho de Melo
2013-10-28 14:29                       ` Arnaldo Carvalho de Melo
2013-10-28 16:05                         ` Frederic Weisbecker
2013-10-28 17:01                           ` Arnaldo Carvalho de Melo
2013-10-28 17:48                             ` Arnaldo Carvalho de Melo
2013-10-29  9:20                               ` Frederic Weisbecker
2013-10-29 13:06                                 ` Arnaldo Carvalho de Melo
2013-10-11  5:15 ` [PATCH 7/8] perf tools: Compare hists comm by addresses Namhyung Kim
2013-11-04 20:19   ` [tip:perf/core] " tip-bot for Frederic Weisbecker
2013-10-11  5:15 ` [PATCH 8/8] perf tools: Get current comm instead of last one Namhyung Kim
2013-10-11  5:58 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Ingo Molnar
2013-10-11  7:34   ` Jiri Olsa
2013-10-11  8:24     ` Namhyung Kim
2013-10-11 12:59       ` Ingo Molnar
2013-10-11 13:04         ` Peter Zijlstra
2013-10-11 15:11     ` David Ahern
2013-10-11 15:20       ` David Ahern
2013-10-11 21:51         ` Andi Kleen
2013-10-11 22:04           ` David Ahern
2013-10-13 10:25             ` Jiri Olsa
2013-10-13 21:18               ` [RFC] perf record,top: Add callchain option into .perfconfig Jiri Olsa
2013-10-13 21:32                 ` Andi Kleen
2013-10-14  7:56               ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Ingo Molnar
2013-10-12 16:53         ` Ingo Molnar
2013-10-12 19:42           ` David Ahern
2013-10-13  5:23             ` Ingo Molnar
2013-10-25 19:09               ` RFP: Fixing "-ga -ag -g fp -g dwarf" was " Arnaldo Carvalho de Melo
2013-10-25 19:22                 ` David Ahern
2013-10-25 19:46                   ` Arnaldo Carvalho de Melo
2013-10-26 12:03                     ` Ingo Molnar
2013-10-26 12:35                       ` Jiri Olsa
2013-10-26 14:25                       ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
2013-10-26 14:25                         ` [PATCH 1/4] perf tools: Split -g and --call-graph for record command Jiri Olsa
2013-10-27 15:30                           ` David Ahern
2013-10-28 17:46                             ` Arnaldo Carvalho de Melo
2013-10-28 18:20                               ` David Ahern
2013-10-29  5:13                                 ` Namhyung Kim
2013-10-28  7:59                           ` Namhyung Kim
2013-10-29 10:18                             ` Jiri Olsa
2013-10-29 12:42                               ` Arnaldo Carvalho de Melo
2013-10-29  8:22                           ` [tip:perf/urgent] perf record: Split -g and --call-graph tip-bot for Jiri Olsa
2013-10-26 14:25                         ` [PATCH 2/4] perf tools: Split -G and --call-graph for top command Jiri Olsa
2013-10-27 15:34                           ` David Ahern
2013-10-28  8:06                             ` Namhyung Kim
2013-10-29  8:22                           ` [tip:perf/urgent] perf top: Split -G and --call-graph tip-bot for Jiri Olsa
2013-10-26 14:25                         ` [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig Jiri Olsa
2013-10-27 15:36                           ` David Ahern
2013-10-28  8:10                           ` Namhyung Kim
2013-10-29 10:18                             ` Jiri Olsa
2013-10-29 12:43                               ` Arnaldo Carvalho de Melo
2013-10-29 12:46                                 ` Ingo Molnar
2013-11-01 15:20                               ` Jiri Olsa
2013-10-26 14:25                         ` [PATCH 4/4] perf tools: Add readable output for callchain debug Jiri Olsa
2013-10-27 15:39                           ` David Ahern
2013-10-26 14:32                         ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
2013-10-27  6:56                           ` Ingo Molnar
2013-10-29 10:21                             ` Jiri Olsa
2013-10-29 10:25                               ` Ingo Molnar
2013-10-13 12:34 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Jiri Olsa
2013-10-14  1:06   ` Namhyung Kim
2013-10-14  4:50   ` Namhyung Kim
2013-10-14  8:01     ` Jiri Olsa
2013-10-14  8:41       ` Namhyung Kim
  -- strict thread matches above, loose matches on Subject: below --
2013-09-26  8:58 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v4) Namhyung Kim
2013-09-26  8:58 ` [PATCH 8/8] perf tools: Get current comm instead of last one Namhyung Kim
2013-10-02 10:01   ` Frederic Weisbecker
2013-10-08  1:56     ` Namhyung Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).