[PATCH] perf report: auto-detect branch stack sampling mode

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] perf report: auto-detect branch stack sampling mode
@ 2012-02-24  9:40 Stephane Eranian
  2012-02-24 15:24 ` David Ahern
  2012-03-05 15:47 ` Ingo Molnar
  0 siblings, 2 replies; 37+ messages in thread
From: Stephane Eranian @ 2012-02-24  9:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: acme, peterz, mingo, dsahern, ravitillo, khandual, asharma,
	robert.richter, ming.m.lin, vweaver1, andi


This patch adds auto-detection of samples with taken branch stacks.
The auto-detection avoids having to specify the -b or --branch-stack
option on the cmdline.

The patch adds a new feature bit HEADER_BRANCH_STACK to mark the
presence of branch stacks in samples.

You can now do:
$ perf record -b any noploop 2
$ perf report 
# Events: 8K cycles
#
# Overhead  Command  Source Shared Object        Source Symbol  Target Shared Object       Target Symbol
# ........  .......  ....................  ...................  ....................  ..................
#
    91.56%  noploop  noploop               [.] noploop                       noploop  [.] noploop
     0.42%  noploop  [kernel.kallsyms]     [k] __lock_acquire      [kernel.kallsyms]  [k] __lock_acquire


To force regular reporting based on the instruction address:
$ perf report --no-branch-stack
#
# Events: 2K cycles
#
# Overhead  Command      Shared Object                           Symbol
# ........  .......  .................  ...............................
#
    92.03%  noploop  noploop            [.] noploop                    
     1.00%  noploop  [kernel.kallsyms]  [k] lock_acquire               


Signed-off-by: Stephane Eranian <eranian@google.com>
---

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 1c49d4e..5e833a2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 	if (!have_tracepoints(&evsel_list->entries))
 		perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
 
+	if (!rec->opts.branch_stack)
+		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+
 	if (!rec->file_new) {
 		err = perf_session__read_header(session, output);
 		if (err < 0)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 528789f..edd4289 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -306,21 +306,14 @@ static int __cmd_report(struct perf_report *rep)
 {
 	int ret = -EINVAL;
 	u64 nr_samples;
-	struct perf_session *session;
 	struct perf_evsel *pos;
+	struct perf_session *session = rep->session;
 	struct map *kernel_map;
 	struct kmap *kernel_kmap;
 	const char *help = "For a higher level overview, try: perf report --sort comm,dso";
 
 	signal(SIGINT, sig_handler);
 
-	session = perf_session__new(rep->input_name, O_RDONLY,
-				    rep->force, false, &rep->tool);
-	if (session == NULL)
-		return -ENOMEM;
-
-	rep->session = session;
-
 	if (rep->cpu_list) {
 		ret = perf_session__cpu_bitmap(session, rep->cpu_list,
 					       rep->cpu_bitmap);
@@ -489,7 +482,10 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 
 int cmd_report(int argc, const char **argv, const char *prefix __used)
 {
+	struct perf_session *session;
 	struct stat st;
+	bool has_br_stack;
+	int ret = -1;
 	char callchain_default_opt[] = "fractal,0.5,callee";
 	const char * const report_usage[] = {
 		"perf report [<options>]",
@@ -600,7 +596,23 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
 			report.input_name = "perf.data";
 	}
 
-	if (sort__branch_mode) {
+	session = perf_session__new(report.input_name, O_RDONLY,
+				    report.force, false, &report.tool);
+	if (session == NULL)
+		return -ENOMEM;
+
+	report.session = session;
+
+	has_br_stack = perf_header__has_feat(&session->header,
+					     HEADER_BRANCH_STACK);
+
+	/*
+	 * if branch mode set by user via -b or --branch-stack
+	 * or not forced off by user (-no-branch-stack) user and present
+	 * in the file then we set branch mode
+	 */
+	if (sort__branch_mode || (sort__branch_mode == -1 && has_br_stack)) {
+		sort__branch_mode = true;
 		if (use_browser)
 			fprintf(stderr, "Warning: TUI interface not supported"
 					" in branch mode\n");
@@ -657,13 +669,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
 	}
 
 	if (symbol__init() < 0)
-		return -1;
+		goto error;
 
 	setup_sorting(report_usage, options);
 
 	if (parent_pattern != default_parent_pattern) {
 		if (sort_dimension__add("parent") < 0)
-			return -1;
+			goto error;
 
 		/*
 		 * Only show the parent fields if we explicitly
@@ -685,5 +697,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
 	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
 	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
 
-	return __cmd_report(&report);
+	ret = __cmd_report(&report);
+error:
+	perf_session__delete(session);
+	return ret;
 }
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index c851495..c22491e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1023,6 +1023,12 @@ static int write_cpuid(int fd, struct perf_header *h __used,
 	return do_write_string(fd, buffer);
 }
 
+static int write_branch_stack(int fd __used, struct perf_header *h __used,
+		       struct perf_evlist *evlist __used)
+{
+	return 0;
+}
+
 static void print_hostname(struct perf_header *ph, int fd, FILE *fp)
 {
 	char *str = do_read_string(fd, ph);
@@ -1315,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp)
 	free(str);
 }
 
+static void print_branch_stack(struct perf_header *ph __used, int fd __used,
+			       FILE *fp)
+{
+	fprintf(fp, "# contains samples with branch stacks\n");
+}
+
 static int __event_process_build_id(struct build_id_event *bev,
 				    char *filename,
 				    struct perf_session *session)
@@ -1519,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPA(HEADER_CMDLINE,	cmdline),
 	FEAT_OPF(HEADER_CPU_TOPOLOGY,	cpu_topology),
 	FEAT_OPF(HEADER_NUMA_TOPOLOGY,	numa_topology),
+	FEAT_OPA(HEADER_BRANCH_STACK,	branch_stack),
 };
 
 struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index e68f617..21a6be0 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -27,7 +27,7 @@ enum {
 	HEADER_EVENT_DESC,
 	HEADER_CPU_TOPOLOGY,
 	HEADER_NUMA_TOPOLOGY,
-
+	HEADER_BRANCH_STACK,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 2739ed1..69d50c0 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -8,7 +8,7 @@ const char	default_sort_order[] = "comm,dso,symbol";
 const char	*sort_order = default_sort_order;
 int		sort__need_collapse = 0;
 int		sort__has_parent = 0;
-bool		sort__branch_mode;
+bool		sort__branch_mode = -1; /* -1 = means not set */
 
 enum sort_type	sort__first_dimension;
 

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-02-24  9:40 [PATCH] perf report: auto-detect branch stack sampling mode Stephane Eranian
@ 2012-02-24 15:24 ` David Ahern
  2012-02-24 15:28   ` Stephane Eranian
  2012-03-05 15:47 ` Ingo Molnar
  1 sibling, 1 reply; 37+ messages in thread
From: David Ahern @ 2012-02-24 15:24 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: linux-kernel, acme, peterz, mingo, ravitillo, khandual, asharma,
	robert.richter, ming.m.lin, vweaver1, andi

On 2/24/12 2:40 AM, Stephane Eranian wrote:
>
> This patch adds auto-detection of samples with taken branch stacks.
> The auto-detection avoids having to specify the -b or --branch-stack
> option on the cmdline.
>
> The patch adds a new feature bit HEADER_BRANCH_STACK to mark the
> presence of branch stacks in samples.
>
> You can now do:
> $ perf record -b any noploop 2
> $ perf report
> # Events: 8K cycles
> #
> # Overhead  Command  Source Shared Object        Source Symbol  Target Shared Object       Target Symbol
> # ........  .......  ....................  ...................  ....................  ..................
> #
>      91.56%  noploop  noploop               [.] noploop                       noploop  [.] noploop
>       0.42%  noploop  [kernel.kallsyms]     [k] __lock_acquire      [kernel.kallsyms]  [k] __lock_acquire
>
>
> To force regular reporting based on the instruction address:
> $ perf report --no-branch-stack
> #
> # Events: 2K cycles
> #
> # Overhead  Command      Shared Object                           Symbol
> # ........  .......  .................  ...............................
> #
>      92.03%  noploop  noploop            [.] noploop
>       1.00%  noploop  [kernel.kallsyms]  [k] lock_acquire
>
>
> Signed-off-by: Stephane Eranian<eranian@google.com>
> ---
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 1c49d4e..5e833a2 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
>   	if (!have_tracepoints(&evsel_list->entries))
>   		perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
>
> +	if (!rec->opts.branch_stack)
> +		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);

branch tracing is user requested on, so shouldn't feature default off 
and only be enabled when requested?

David


> +
>   	if (!rec->file_new) {
>   		err = perf_session__read_header(session, output);
>   		if (err<  0)
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index 528789f..edd4289 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -306,21 +306,14 @@ static int __cmd_report(struct perf_report *rep)
>   {
>   	int ret = -EINVAL;
>   	u64 nr_samples;
> -	struct perf_session *session;
>   	struct perf_evsel *pos;
> +	struct perf_session *session = rep->session;
>   	struct map *kernel_map;
>   	struct kmap *kernel_kmap;
>   	const char *help = "For a higher level overview, try: perf report --sort comm,dso";
>
>   	signal(SIGINT, sig_handler);
>
> -	session = perf_session__new(rep->input_name, O_RDONLY,
> -				    rep->force, false,&rep->tool);
> -	if (session == NULL)
> -		return -ENOMEM;
> -
> -	rep->session = session;
> -
>   	if (rep->cpu_list) {
>   		ret = perf_session__cpu_bitmap(session, rep->cpu_list,
>   					       rep->cpu_bitmap);
> @@ -489,7 +482,10 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
>
>   int cmd_report(int argc, const char **argv, const char *prefix __used)
>   {
> +	struct perf_session *session;
>   	struct stat st;
> +	bool has_br_stack;
> +	int ret = -1;
>   	char callchain_default_opt[] = "fractal,0.5,callee";
>   	const char * const report_usage[] = {
>   		"perf report [<options>]",
> @@ -600,7 +596,23 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
>   			report.input_name = "perf.data";
>   	}
>
> -	if (sort__branch_mode) {
> +	session = perf_session__new(report.input_name, O_RDONLY,
> +				    report.force, false,&report.tool);
> +	if (session == NULL)
> +		return -ENOMEM;
> +
> +	report.session = session;
> +
> +	has_br_stack = perf_header__has_feat(&session->header,
> +					     HEADER_BRANCH_STACK);
> +
> +	/*
> +	 * if branch mode set by user via -b or --branch-stack
> +	 * or not forced off by user (-no-branch-stack) user and present
> +	 * in the file then we set branch mode
> +	 */
> +	if (sort__branch_mode || (sort__branch_mode == -1&&  has_br_stack)) {
> +		sort__branch_mode = true;
>   		if (use_browser)
>   			fprintf(stderr, "Warning: TUI interface not supported"
>   					" in branch mode\n");
> @@ -657,13 +669,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
>   	}
>
>   	if (symbol__init()<  0)
> -		return -1;
> +		goto error;
>
>   	setup_sorting(report_usage, options);
>
>   	if (parent_pattern != default_parent_pattern) {
>   		if (sort_dimension__add("parent")<  0)
> -			return -1;
> +			goto error;
>
>   		/*
>   		 * Only show the parent fields if we explicitly
> @@ -685,5 +697,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
>   	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
>   	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
>
> -	return __cmd_report(&report);
> +	ret = __cmd_report(&report);
> +error:
> +	perf_session__delete(session);
> +	return ret;
>   }
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index c851495..c22491e 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -1023,6 +1023,12 @@ static int write_cpuid(int fd, struct perf_header *h __used,
>   	return do_write_string(fd, buffer);
>   }
>
> +static int write_branch_stack(int fd __used, struct perf_header *h __used,
> +		       struct perf_evlist *evlist __used)
> +{
> +	return 0;
> +}
> +
>   static void print_hostname(struct perf_header *ph, int fd, FILE *fp)
>   {
>   	char *str = do_read_string(fd, ph);
> @@ -1315,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp)
>   	free(str);
>   }
>
> +static void print_branch_stack(struct perf_header *ph __used, int fd __used,
> +			       FILE *fp)
> +{
> +	fprintf(fp, "# contains samples with branch stacks\n");
> +}
> +
>   static int __event_process_build_id(struct build_id_event *bev,
>   				    char *filename,
>   				    struct perf_session *session)
> @@ -1519,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
>   	FEAT_OPA(HEADER_CMDLINE,	cmdline),
>   	FEAT_OPF(HEADER_CPU_TOPOLOGY,	cpu_topology),
>   	FEAT_OPF(HEADER_NUMA_TOPOLOGY,	numa_topology),
> +	FEAT_OPA(HEADER_BRANCH_STACK,	branch_stack),
>   };
>
>   struct header_print_data {
> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
> index e68f617..21a6be0 100644
> --- a/tools/perf/util/header.h
> +++ b/tools/perf/util/header.h
> @@ -27,7 +27,7 @@ enum {
>   	HEADER_EVENT_DESC,
>   	HEADER_CPU_TOPOLOGY,
>   	HEADER_NUMA_TOPOLOGY,
> -
> +	HEADER_BRANCH_STACK,
>   	HEADER_LAST_FEATURE,
>   	HEADER_FEAT_BITS	= 256,
>   };
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index 2739ed1..69d50c0 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -8,7 +8,7 @@ const char	default_sort_order[] = "comm,dso,symbol";
>   const char	*sort_order = default_sort_order;
>   int		sort__need_collapse = 0;
>   int		sort__has_parent = 0;
> -bool		sort__branch_mode;
> +bool		sort__branch_mode = -1; /* -1 = means not set */
>
>   enum sort_type	sort__first_dimension;
>


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-02-24 15:24 ` David Ahern
@ 2012-02-24 15:28   ` Stephane Eranian
  2012-02-24 15:31     ` David Ahern
  0 siblings, 1 reply; 37+ messages in thread
From: Stephane Eranian @ 2012-02-24 15:28 UTC (permalink / raw)
  To: David Ahern
  Cc: linux-kernel, acme, peterz, mingo, ravitillo, khandual, asharma,
	robert.richter, ming.m.lin, vweaver1, andi

On Fri, Feb 24, 2012 at 4:24 PM, David Ahern <dsahern@gmail.com> wrote:
> On 2/24/12 2:40 AM, Stephane Eranian wrote:
>>
>>
>> This patch adds auto-detection of samples with taken branch stacks.
>> The auto-detection avoids having to specify the -b or --branch-stack
>> option on the cmdline.
>>
>> The patch adds a new feature bit HEADER_BRANCH_STACK to mark the
>> presence of branch stacks in samples.
>>
>> You can now do:
>> $ perf record -b any noploop 2
>> $ perf report
>> # Events: 8K cycles
>> #
>> # Overhead  Command  Source Shared Object        Source Symbol  Target
>> Shared Object       Target Symbol
>> # ........  .......  ....................  ...................
>>  ....................  ..................
>> #
>>     91.56%  noploop  noploop               [.] noploop
>>   noploop  [.] noploop
>>      0.42%  noploop  [kernel.kallsyms]     [k] __lock_acquire
>>  [kernel.kallsyms]  [k] __lock_acquire
>>
>>
>> To force regular reporting based on the instruction address:
>> $ perf report --no-branch-stack
>> #
>> # Events: 2K cycles
>> #
>> # Overhead  Command      Shared Object                           Symbol
>> # ........  .......  .................  ...............................
>> #
>>     92.03%  noploop  noploop            [.] noploop
>>      1.00%  noploop  [kernel.kallsyms]  [k] lock_acquire
>>
>>
>> Signed-off-by: Stephane Eranian<eranian@google.com>
>> ---
>>
>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
>> index 1c49d4e..5e833a2 100644
>> --- a/tools/perf/builtin-record.c
>> +++ b/tools/perf/builtin-record.c
>> @@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int
>> argc, const char **argv)
>>        if (!have_tracepoints(&evsel_list->entries))
>>                perf_header__clear_feat(&session->header,
>> HEADER_TRACE_INFO);
>>
>> +       if (!rec->opts.branch_stack)
>> +               perf_header__clear_feat(&session->header,
>> HEADER_BRANCH_STACK);
>
>
> branch tracing is user requested on, so shouldn't feature default off and
> only be enabled when requested?
>
Well, what Ingo was suggesting is that perf report auto-detects whether or
not branch mode is necessary by looking at the perf.data file. Most likely
if you've recorded with -b, you are interested in a branch mode view rather
that the instruction view (default). So all this does is elimintate the need
to pass -b to perf report to enable branch mode.

> David
>
>
>> +
>>        if (!rec->file_new) {
>>                err = perf_session__read_header(session, output);
>>                if (err<  0)
>> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
>> index 528789f..edd4289 100644
>> --- a/tools/perf/builtin-report.c
>> +++ b/tools/perf/builtin-report.c
>> @@ -306,21 +306,14 @@ static int __cmd_report(struct perf_report *rep)
>>  {
>>        int ret = -EINVAL;
>>        u64 nr_samples;
>> -       struct perf_session *session;
>>        struct perf_evsel *pos;
>> +       struct perf_session *session = rep->session;
>>        struct map *kernel_map;
>>        struct kmap *kernel_kmap;
>>        const char *help = "For a higher level overview, try: perf report
>> --sort comm,dso";
>>
>>        signal(SIGINT, sig_handler);
>>
>> -       session = perf_session__new(rep->input_name, O_RDONLY,
>> -                                   rep->force, false,&rep->tool);
>>
>> -       if (session == NULL)
>> -               return -ENOMEM;
>> -
>> -       rep->session = session;
>> -
>>        if (rep->cpu_list) {
>>                ret = perf_session__cpu_bitmap(session, rep->cpu_list,
>>                                               rep->cpu_bitmap);
>> @@ -489,7 +482,10 @@ parse_callchain_opt(const struct option *opt, const
>> char *arg, int unset)
>>
>>  int cmd_report(int argc, const char **argv, const char *prefix __used)
>>  {
>> +       struct perf_session *session;
>>        struct stat st;
>> +       bool has_br_stack;
>> +       int ret = -1;
>>        char callchain_default_opt[] = "fractal,0.5,callee";
>>        const char * const report_usage[] = {
>>                "perf report [<options>]",
>> @@ -600,7 +596,23 @@ int cmd_report(int argc, const char **argv, const
>> char *prefix __used)
>>                        report.input_name = "perf.data";
>>        }
>>
>> -       if (sort__branch_mode) {
>> +       session = perf_session__new(report.input_name, O_RDONLY,
>> +                                   report.force, false,&report.tool);
>> +       if (session == NULL)
>> +               return -ENOMEM;
>> +
>> +       report.session = session;
>> +
>> +       has_br_stack = perf_header__has_feat(&session->header,
>> +                                            HEADER_BRANCH_STACK);
>> +
>> +       /*
>> +        * if branch mode set by user via -b or --branch-stack
>> +        * or not forced off by user (-no-branch-stack) user and present
>> +        * in the file then we set branch mode
>> +        */
>> +       if (sort__branch_mode || (sort__branch_mode == -1&&
>>  has_br_stack)) {
>>
>> +               sort__branch_mode = true;
>>                if (use_browser)
>>                        fprintf(stderr, "Warning: TUI interface not
>> supported"
>>                                        " in branch mode\n");
>> @@ -657,13 +669,13 @@ int cmd_report(int argc, const char **argv, const
>> char *prefix __used)
>>        }
>>
>>        if (symbol__init()<  0)
>> -               return -1;
>> +               goto error;
>>
>>        setup_sorting(report_usage, options);
>>
>>        if (parent_pattern != default_parent_pattern) {
>>                if (sort_dimension__add("parent")<  0)
>> -                       return -1;
>> +                       goto error;
>>
>>                /*
>>                 * Only show the parent fields if we explicitly
>> @@ -685,5 +697,8 @@ int cmd_report(int argc, const char **argv, const char
>> *prefix __used)
>>        sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm",
>> stdout);
>>        sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol",
>> stdout);
>>
>> -       return __cmd_report(&report);
>> +       ret = __cmd_report(&report);
>> +error:
>> +       perf_session__delete(session);
>> +       return ret;
>>  }
>> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
>> index c851495..c22491e 100644
>> --- a/tools/perf/util/header.c
>> +++ b/tools/perf/util/header.c
>> @@ -1023,6 +1023,12 @@ static int write_cpuid(int fd, struct perf_header
>> *h __used,
>>        return do_write_string(fd, buffer);
>>  }
>>
>> +static int write_branch_stack(int fd __used, struct perf_header *h
>> __used,
>> +                      struct perf_evlist *evlist __used)
>> +{
>> +       return 0;
>> +}
>> +
>>  static void print_hostname(struct perf_header *ph, int fd, FILE *fp)
>>  {
>>        char *str = do_read_string(fd, ph);
>> @@ -1315,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int
>> fd, FILE *fp)
>>        free(str);
>>  }
>>
>> +static void print_branch_stack(struct perf_header *ph __used, int fd
>> __used,
>> +                              FILE *fp)
>> +{
>> +       fprintf(fp, "# contains samples with branch stacks\n");
>> +}
>> +
>>  static int __event_process_build_id(struct build_id_event *bev,
>>                                    char *filename,
>>                                    struct perf_session *session)
>> @@ -1519,6 +1531,7 @@ static const struct feature_ops
>> feat_ops[HEADER_LAST_FEATURE] = {
>>        FEAT_OPA(HEADER_CMDLINE,        cmdline),
>>        FEAT_OPF(HEADER_CPU_TOPOLOGY,   cpu_topology),
>>        FEAT_OPF(HEADER_NUMA_TOPOLOGY,  numa_topology),
>> +       FEAT_OPA(HEADER_BRANCH_STACK,   branch_stack),
>>  };
>>
>>  struct header_print_data {
>> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
>> index e68f617..21a6be0 100644
>> --- a/tools/perf/util/header.h
>> +++ b/tools/perf/util/header.h
>> @@ -27,7 +27,7 @@ enum {
>>        HEADER_EVENT_DESC,
>>        HEADER_CPU_TOPOLOGY,
>>        HEADER_NUMA_TOPOLOGY,
>> -
>> +       HEADER_BRANCH_STACK,
>>        HEADER_LAST_FEATURE,
>>        HEADER_FEAT_BITS        = 256,
>>  };
>> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
>> index 2739ed1..69d50c0 100644
>> --- a/tools/perf/util/sort.c
>> +++ b/tools/perf/util/sort.c
>> @@ -8,7 +8,7 @@ const char      default_sort_order[] = "comm,dso,symbol";
>>  const char    *sort_order = default_sort_order;
>>  int           sort__need_collapse = 0;
>>  int           sort__has_parent = 0;
>> -bool           sort__branch_mode;
>> +bool           sort__branch_mode = -1; /* -1 = means not set */
>>
>>  enum sort_type        sort__first_dimension;
>>
>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-02-24 15:28   ` Stephane Eranian
@ 2012-02-24 15:31     ` David Ahern
  2012-02-24 15:40       ` Stephane Eranian
  0 siblings, 1 reply; 37+ messages in thread
From: David Ahern @ 2012-02-24 15:31 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: linux-kernel, acme, peterz, mingo, ravitillo, khandual, asharma,
	robert.richter, ming.m.lin, vweaver1, andi

On 2/24/12 8:28 AM, Stephane Eranian wrote:
> On Fri, Feb 24, 2012 at 4:24 PM, David Ahern<dsahern@gmail.com>  wrote:
>> On 2/24/12 2:40 AM, Stephane Eranian wrote:
>>>
>>>
>>> This patch adds auto-detection of samples with taken branch stacks.
>>> The auto-detection avoids having to specify the -b or --branch-stack
>>> option on the cmdline.
>>>
>>> The patch adds a new feature bit HEADER_BRANCH_STACK to mark the
>>> presence of branch stacks in samples.
>>>
>>> You can now do:
>>> $ perf record -b any noploop 2
>>> $ perf report
>>> # Events: 8K cycles
>>> #
>>> # Overhead  Command  Source Shared Object        Source Symbol  Target
>>> Shared Object       Target Symbol
>>> # ........  .......  ....................  ...................
>>>   ....................  ..................
>>> #
>>>      91.56%  noploop  noploop               [.] noploop
>>>    noploop  [.] noploop
>>>       0.42%  noploop  [kernel.kallsyms]     [k] __lock_acquire
>>>   [kernel.kallsyms]  [k] __lock_acquire
>>>
>>>
>>> To force regular reporting based on the instruction address:
>>> $ perf report --no-branch-stack
>>> #
>>> # Events: 2K cycles
>>> #
>>> # Overhead  Command      Shared Object                           Symbol
>>> # ........  .......  .................  ...............................
>>> #
>>>      92.03%  noploop  noploop            [.] noploop
>>>       1.00%  noploop  [kernel.kallsyms]  [k] lock_acquire
>>>
>>>
>>> Signed-off-by: Stephane Eranian<eranian@google.com>
>>> ---
>>>
>>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
>>> index 1c49d4e..5e833a2 100644
>>> --- a/tools/perf/builtin-record.c
>>> +++ b/tools/perf/builtin-record.c
>>> @@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int
>>> argc, const char **argv)
>>>         if (!have_tracepoints(&evsel_list->entries))
>>>                 perf_header__clear_feat(&session->header,
>>> HEADER_TRACE_INFO);
>>>
>>> +       if (!rec->opts.branch_stack)
>>> +               perf_header__clear_feat(&session->header,
>>> HEADER_BRANCH_STACK);
>>
>>
>> branch tracing is user requested on, so shouldn't feature default off and
>> only be enabled when requested?
>>
> Well, what Ingo was suggesting is that perf report auto-detects whether or
> not branch mode is necessary by looking at the perf.data file. Most likely
> if you've recorded with -b, you are interested in a branch mode view rather
> that the instruction view (default). So all this does is elimintate the need
> to pass -b to perf report to enable branch mode.

Right. My point is that HEADER_BRANCH_STACK feature should be off by 
default and only enabled by builtin-record when branch stack is 
requested. You have the reverse -- enabled by default and off in record 
if not requested.

David


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-02-24 15:31     ` David Ahern
@ 2012-02-24 15:40       ` Stephane Eranian
  2012-02-24 15:49         ` David Ahern
  0 siblings, 1 reply; 37+ messages in thread
From: Stephane Eranian @ 2012-02-24 15:40 UTC (permalink / raw)
  To: David Ahern
  Cc: linux-kernel, acme, peterz, mingo, ravitillo, khandual, asharma,
	robert.richter, ming.m.lin, vweaver1, andi

On Fri, Feb 24, 2012 at 4:31 PM, David Ahern <dsahern@gmail.com> wrote:
> On 2/24/12 8:28 AM, Stephane Eranian wrote:
>>
>> On Fri, Feb 24, 2012 at 4:24 PM, David Ahern<dsahern@gmail.com>  wrote:
>>>
>>> On 2/24/12 2:40 AM, Stephane Eranian wrote:
>>>>
>>>>
>>>>
>>>> This patch adds auto-detection of samples with taken branch stacks.
>>>> The auto-detection avoids having to specify the -b or --branch-stack
>>>> option on the cmdline.
>>>>
>>>> The patch adds a new feature bit HEADER_BRANCH_STACK to mark the
>>>> presence of branch stacks in samples.
>>>>
>>>> You can now do:
>>>> $ perf record -b any noploop 2
>>>> $ perf report
>>>> # Events: 8K cycles
>>>> #
>>>> # Overhead  Command  Source Shared Object        Source Symbol  Target
>>>> Shared Object       Target Symbol
>>>> # ........  .......  ....................  ...................
>>>>  ....................  ..................
>>>> #
>>>>     91.56%  noploop  noploop               [.] noploop
>>>>   noploop  [.] noploop
>>>>      0.42%  noploop  [kernel.kallsyms]     [k] __lock_acquire
>>>>  [kernel.kallsyms]  [k] __lock_acquire
>>>>
>>>>
>>>> To force regular reporting based on the instruction address:
>>>> $ perf report --no-branch-stack
>>>> #
>>>> # Events: 2K cycles
>>>> #
>>>> # Overhead  Command      Shared Object                           Symbol
>>>> # ........  .......  .................  ...............................
>>>> #
>>>>     92.03%  noploop  noploop            [.] noploop
>>>>      1.00%  noploop  [kernel.kallsyms]  [k] lock_acquire
>>>>
>>>>
>>>> Signed-off-by: Stephane Eranian<eranian@google.com>
>>>> ---
>>>>
>>>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
>>>> index 1c49d4e..5e833a2 100644
>>>> --- a/tools/perf/builtin-record.c
>>>> +++ b/tools/perf/builtin-record.c
>>>> @@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int
>>>> argc, const char **argv)
>>>>        if (!have_tracepoints(&evsel_list->entries))
>>>>                perf_header__clear_feat(&session->header,
>>>> HEADER_TRACE_INFO);
>>>>
>>>> +       if (!rec->opts.branch_stack)
>>>> +               perf_header__clear_feat(&session->header,
>>>> HEADER_BRANCH_STACK);
>>>
>>>
>>>
>>> branch tracing is user requested on, so shouldn't feature default off and
>>> only be enabled when requested?
>>>
>> Well, what Ingo was suggesting is that perf report auto-detects whether or
>> not branch mode is necessary by looking at the perf.data file. Most likely
>> if you've recorded with -b, you are interested in a branch mode view
>> rather
>> that the instruction view (default). So all this does is elimintate the
>> need
>> to pass -b to perf report to enable branch mode.
>
>
> Right. My point is that HEADER_BRANCH_STACK feature should be off by default
> and only enabled by builtin-record when branch stack is requested. You have
> the reverse -- enabled by default and off in record if not requested.
>
No, I don't. Read the code carefully. The for loop sets all known feature bits.
Then, the ones not necessary or unused are turned off individually.

> David
>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-02-24 15:40       ` Stephane Eranian
@ 2012-02-24 15:49         ` David Ahern
  2012-02-24 15:51           ` Stephane Eranian
  0 siblings, 1 reply; 37+ messages in thread
From: David Ahern @ 2012-02-24 15:49 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: linux-kernel, acme, peterz, mingo, ravitillo, khandual, asharma,
	robert.richter, ming.m.lin, vweaver1, andi

On 2/24/12 8:40 AM, Stephane Eranian wrote:
> No, I don't. Read the code carefully. The for loop sets all known feature bits.
> Then, the ones not necessary or unused are turned off individually.

Ok, I see now. The __set_feat loop is earlier in builtin-record.c, not 
the util code.

David

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-02-24 15:49         ` David Ahern
@ 2012-02-24 15:51           ` Stephane Eranian
  2012-03-02 17:47             ` Stephane Eranian
  0 siblings, 1 reply; 37+ messages in thread
From: Stephane Eranian @ 2012-02-24 15:51 UTC (permalink / raw)
  To: David Ahern
  Cc: linux-kernel, acme, peterz, mingo, ravitillo, khandual, asharma,
	robert.richter, ming.m.lin, vweaver1, andi

On Fri, Feb 24, 2012 at 4:49 PM, David Ahern <dsahern@gmail.com> wrote:
> On 2/24/12 8:40 AM, Stephane Eranian wrote:
>>
>> No, I don't. Read the code carefully. The for loop sets all known feature
>> bits.
>> Then, the ones not necessary or unused are turned off individually.
>
>
> Ok, I see now. The __set_feat loop is earlier in builtin-record.c, not the
> util code.
>
Yeah. Arnaldo simplified the original code in builtin-record.c to have the loop
instead of individual set_feat().

> David

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-02-24 15:51           ` Stephane Eranian
@ 2012-03-02 17:47             ` Stephane Eranian
  2012-03-02 19:08               ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 37+ messages in thread
From: Stephane Eranian @ 2012-03-02 17:47 UTC (permalink / raw)
  To: David Ahern
  Cc: linux-kernel, acme, peterz, mingo, ravitillo, khandual, asharma,
	robert.richter, ming.m.lin, vweaver1, andi

Arnaldo,

What do you think about this patch?


On Fri, Feb 24, 2012 at 4:51 PM, Stephane Eranian <eranian@google.com> wrote:
> On Fri, Feb 24, 2012 at 4:49 PM, David Ahern <dsahern@gmail.com> wrote:
>> On 2/24/12 8:40 AM, Stephane Eranian wrote:
>>>
>>> No, I don't. Read the code carefully. The for loop sets all known feature
>>> bits.
>>> Then, the ones not necessary or unused are turned off individually.
>>
>>
>> Ok, I see now. The __set_feat loop is earlier in builtin-record.c, not the
>> util code.
>>
> Yeah. Arnaldo simplified the original code in builtin-record.c to have the loop
> instead of individual set_feat().
>
>> David

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-02 17:47             ` Stephane Eranian
@ 2012-03-02 19:08               ` Arnaldo Carvalho de Melo
  2012-03-03 19:43                 ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 37+ messages in thread
From: Arnaldo Carvalho de Melo @ 2012-03-02 19:08 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: David Ahern, linux-kernel, peterz, mingo, ravitillo, khandual,
	asharma, robert.richter, ming.m.lin, vweaver1, andi

Em Fri, Mar 02, 2012 at 06:47:38PM +0100, Stephane Eranian escreveu:
> Arnaldo,
> 
> What do you think about this patch?

I'm processing perf/core patches today, will get to this one and merge
or let you know objections,

- Arnaldo
 
> 
> On Fri, Feb 24, 2012 at 4:51 PM, Stephane Eranian <eranian@google.com> wrote:
> > On Fri, Feb 24, 2012 at 4:49 PM, David Ahern <dsahern@gmail.com> wrote:
> >> On 2/24/12 8:40 AM, Stephane Eranian wrote:
> >>>
> >>> No, I don't. Read the code carefully. The for loop sets all known feature
> >>> bits.
> >>> Then, the ones not necessary or unused are turned off individually.
> >>
> >>
> >> Ok, I see now. The __set_feat loop is earlier in builtin-record.c, not the
> >> util code.
> >>
> > Yeah. Arnaldo simplified the original code in builtin-record.c to have the loop
> > instead of individual set_feat().
> >
> >> David

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-02 19:08               ` Arnaldo Carvalho de Melo
@ 2012-03-03 19:43                 ` Arnaldo Carvalho de Melo
  2012-03-05 10:49                   ` Ingo Molnar
  0 siblings, 1 reply; 37+ messages in thread
From: Arnaldo Carvalho de Melo @ 2012-03-03 19:43 UTC (permalink / raw)
  To: Ingo Molnar, Stephane Eranian
  Cc: David Ahern, linux-kernel, peterz, ravitillo, khandual, asharma,
	robert.richter, ming.m.lin, vweaver1, andi

Em Fri, Mar 02, 2012 at 04:08:22PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Fri, Mar 02, 2012 at 06:47:38PM +0100, Stephane Eranian escreveu:
> > Arnaldo,
> > 
> > What do you think about this patch?
> 
> I'm processing perf/core patches today, will get to this one and merge
> or let you know objections,

Ingo,

	What is the status of merging
http://git.kernel.org/?p=linux/kernel/git/peterz/queue.git;a=summary ?

	After that I can try merging Stephane's autho-detect patch,

- Arnaldo
 
> > On Fri, Feb 24, 2012 at 4:51 PM, Stephane Eranian <eranian@google.com> wrote:
> > > On Fri, Feb 24, 2012 at 4:49 PM, David Ahern <dsahern@gmail.com> wrote:
> > >> On 2/24/12 8:40 AM, Stephane Eranian wrote:
> > >>>
> > >>> No, I don't. Read the code carefully. The for loop sets all known feature
> > >>> bits.
> > >>> Then, the ones not necessary or unused are turned off individually.
> > >>
> > >>
> > >> Ok, I see now. The __set_feat loop is earlier in builtin-record.c, not the
> > >> util code.
> > >>
> > > Yeah. Arnaldo simplified the original code in builtin-record.c to have the loop
> > > instead of individual set_feat().
> > >
> > >> David

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-03 19:43                 ` Arnaldo Carvalho de Melo
@ 2012-03-05 10:49                   ` Ingo Molnar
  2012-03-05 11:11                     ` Peter Zijlstra
  0 siblings, 1 reply; 37+ messages in thread
From: Ingo Molnar @ 2012-03-05 10:49 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Stephane Eranian, David Ahern, linux-kernel, peterz, ravitillo,
	khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi


* Arnaldo Carvalho de Melo <acme@redhat.com> wrote:

> Em Fri, Mar 02, 2012 at 04:08:22PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Fri, Mar 02, 2012 at 06:47:38PM +0100, Stephane Eranian escreveu:
> > > Arnaldo,
> > > 
> > > What do you think about this patch?
> > 
> > I'm processing perf/core patches today, will get to this one and merge
> > or let you know objections,
> 
> Ingo,
> 
> 	What is the status of merging
> http://git.kernel.org/?p=linux/kernel/git/peterz/queue.git;a=summary ?
> 
> 	After that I can try merging Stephane's autho-detect patch,

I guess I'll get this from Peter soonish?

I think it would be nice to have the tooling patches in the same 
queue as well, so that I can try it all out as one package 
before pushing it.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-05 10:49                   ` Ingo Molnar
@ 2012-03-05 11:11                     ` Peter Zijlstra
  0 siblings, 0 replies; 37+ messages in thread
From: Peter Zijlstra @ 2012-03-05 11:11 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Arnaldo Carvalho de Melo, Stephane Eranian, David Ahern,
	linux-kernel, ravitillo, khandual, asharma, robert.richter,
	ming.m.lin, vweaver1, andi

On Mon, 2012-03-05 at 11:49 +0100, Ingo Molnar wrote:
> * Arnaldo Carvalho de Melo <acme@redhat.com> wrote:
> 
> > Em Fri, Mar 02, 2012 at 04:08:22PM -0300, Arnaldo Carvalho de Melo escreveu:
> > > Em Fri, Mar 02, 2012 at 06:47:38PM +0100, Stephane Eranian escreveu:
> > > > Arnaldo,
> > > > 
> > > > What do you think about this patch?
> > > 
> > > I'm processing perf/core patches today, will get to this one and merge
> > > or let you know objections,
> > 
> > Ingo,
> > 
> > 	What is the status of merging
> > http://git.kernel.org/?p=linux/kernel/git/peterz/queue.git;a=summary ?
> > 
> > 	After that I can try merging Stephane's autho-detect patch,
> 
> I guess I'll get this from Peter soonish?
> 
> I think it would be nice to have the tooling patches in the same 
> queue as well, so that I can try it all out as one package 
> before pushing it.

Hmm,. I somehow thought you already had it since you complained about
that record vs report thing. Lemme resurrect that series then.. :/

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-02-24  9:40 [PATCH] perf report: auto-detect branch stack sampling mode Stephane Eranian
  2012-02-24 15:24 ` David Ahern
@ 2012-03-05 15:47 ` Ingo Molnar
  2012-03-05 15:50   ` Ingo Molnar
                     ` (2 more replies)
  1 sibling, 3 replies; 37+ messages in thread
From: Ingo Molnar @ 2012-03-05 15:47 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: linux-kernel, acme, peterz, dsahern, ravitillo, khandual, asharma,
	robert.richter, ming.m.lin, vweaver1, andi, Peter Zijlstra


Another usability bug of the new branch sampling feature is that 
typing the obvious:

  $ perf record -b ./myprog

does not do the obvious thing and default to 'any' but throws a 
*very* unhelpful generic perf error:

 usage: perf record [<options>] [<command>]
    or: perf record [<options>] -- <command> [<options>]

 ...

Really, we should do better than this. Only people who *want* to 
specify finer LBR filters should be forced to specify them.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-05 15:47 ` Ingo Molnar
@ 2012-03-05 15:50   ` Ingo Molnar
  2012-03-05 15:56     ` Ingo Molnar
  2012-03-05 15:52   ` [PATCH] perf report: auto-detect branch stack sampling mode Stephane Eranian
  2012-03-07 12:49   ` Stephane Eranian
  2 siblings, 1 reply; 37+ messages in thread
From: Ingo Molnar @ 2012-03-05 15:50 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: linux-kernel, acme, peterz, dsahern, ravitillo, khandual, asharma,
	robert.richter, ming.m.lin, vweaver1, andi, Peter Zijlstra


* Ingo Molnar <mingo@elte.hu> wrote:

> Really, we should do better than this. Only people who *want* 
> to specify finer LBR filters should be forced to specify them.

Another detail is that perf record outputs this message:

 [ perf record: Captured and wrote 1.699 MB perf.data (~74220 samples) ]

The '~74220 samples' should probably say '~74220 branch 
samples'. (And yes, I realize that it's not just branch samples 
but that's not a big issue.)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-05 15:50   ` Ingo Molnar
@ 2012-03-05 15:56     ` Ingo Molnar
  2012-03-05 16:30       ` Peter Zijlstra
  0 siblings, 1 reply; 37+ messages in thread
From: Ingo Molnar @ 2012-03-05 15:56 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: linux-kernel, acme, peterz, dsahern, ravitillo, khandual, asharma,
	robert.richter, ming.m.lin, vweaver1, andi, Peter Zijlstra

A third usability bug is that for some reason a branch sampling 
'perf report' defaults to the stdio output on my testsystem - 
with --no-branch-stack it gives the regular tui.

Even specifying --tui explicitly does not restore the TUI. Is 
there any fundamental reason why the branch stack does not 
support the TUI?

A fourth bug is that regular profiling appears to be busted:

 aldebaran:> perf record -a sleep 1
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.026 MB perf.data (~1120 samples) ]

 aldebaran:~> perf report
 Warning: TUI interface not supported in branch mode
 selected -b but no branch data. Did you call perf record without -b?

Guys, why are you sending me these patches? The tooling side is 
utterly untested and very raw yet.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-05 15:56     ` Ingo Molnar
@ 2012-03-05 16:30       ` Peter Zijlstra
  2012-03-05 16:32         ` Stephane Eranian
  2012-03-05 20:35         ` Arnaldo Carvalho de Melo
  0 siblings, 2 replies; 37+ messages in thread
From: Peter Zijlstra @ 2012-03-05 16:30 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Stephane Eranian, linux-kernel, acme, dsahern, ravitillo,
	khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi

On Mon, 2012-03-05 at 16:56 +0100, Ingo Molnar wrote:
> A third usability bug is that for some reason a branch sampling 
> 'perf report' defaults to the stdio output on my testsystem - 
> with --no-branch-stack it gives the regular tui.
> 
> 
Ah, I would not have noticed that since I still explicitly build my perf
without TUI support. That stuff mostly just gets in the way.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-05 16:30       ` Peter Zijlstra
@ 2012-03-05 16:32         ` Stephane Eranian
  2012-03-05 17:20           ` Ingo Molnar
  2012-03-05 20:35         ` Arnaldo Carvalho de Melo
  1 sibling, 1 reply; 37+ messages in thread
From: Stephane Eranian @ 2012-03-05 16:32 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, linux-kernel, acme, dsahern, ravitillo, khandual,
	asharma, robert.richter, ming.m.lin, vweaver1, andi

On Mon, Mar 5, 2012 at 5:30 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, 2012-03-05 at 16:56 +0100, Ingo Molnar wrote:
>> A third usability bug is that for some reason a branch sampling
>> 'perf report' defaults to the stdio output on my testsystem -
>> with --no-branch-stack it gives the regular tui.
>>
>>
> Ah, I would not have noticed that since I still explicitly build my perf
> without TUI support. That stuff mostly just gets in the way.

I tend to agree with Peter here.

There is no TUI support for branch as of now.

I will fix the other two bugs you reported.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-05 16:32         ` Stephane Eranian
@ 2012-03-05 17:20           ` Ingo Molnar
  0 siblings, 0 replies; 37+ messages in thread
From: Ingo Molnar @ 2012-03-05 17:20 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Peter Zijlstra, linux-kernel, acme, dsahern, ravitillo, khandual,
	asharma, robert.richter, ming.m.lin, vweaver1, andi


* Stephane Eranian <eranian@google.com> wrote:

> On Mon, Mar 5, 2012 at 5:30 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> > On Mon, 2012-03-05 at 16:56 +0100, Ingo Molnar wrote:
> >> A third usability bug is that for some reason a branch sampling
> >> 'perf report' defaults to the stdio output on my testsystem -
> >> with --no-branch-stack it gives the regular tui.
> >>
> >>
> > Ah, I would not have noticed that since I still explicitly 
> > build my perf without TUI support. That stuff mostly just 
> > gets in the way.
> 
> I tend to agree with Peter here.

I use the TUI most of the time (it allows me to navigate and 
look at annotated output easily - something the --stdio one 
cannot do), and I know at least two other kernel developers who 
prefer the TUI, so it's a YMMV thing.

> There is no TUI support for branch as of now.

Why? What would be needed to add proper TUI support?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-05 16:30       ` Peter Zijlstra
  2012-03-05 16:32         ` Stephane Eranian
@ 2012-03-05 20:35         ` Arnaldo Carvalho de Melo
  2012-03-05 21:43           ` Arun Sharma
  1 sibling, 1 reply; 37+ messages in thread
From: Arnaldo Carvalho de Melo @ 2012-03-05 20:35 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Stephane Eranian, linux-kernel, dsahern, ravitillo,
	khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi

Em Mon, Mar 05, 2012 at 05:30:33PM +0100, Peter Zijlstra escreveu:
> On Mon, 2012-03-05 at 16:56 +0100, Ingo Molnar wrote:
> > A third usability bug is that for some reason a branch sampling 
> > 'perf report' defaults to the stdio output on my testsystem - 
> > with --no-branch-stack it gives the regular tui.

> Ah, I would not have noticed that since I still explicitly build my perf
> without TUI support. That stuff mostly just gets in the way.

Do you have any specific complaints about it?

There were changes to make it work as the stdio one when navigation keys
aren't used, i.e. color scheme, etc.

Its not there to be in the way, but to be useful, so any specific
complaints I would love to hear from you.

- Arnaldo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-05 20:35         ` Arnaldo Carvalho de Melo
@ 2012-03-05 21:43           ` Arun Sharma
  2012-03-05 22:26             ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 37+ messages in thread
From: Arun Sharma @ 2012-03-05 21:43 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Ingo Molnar, Stephane Eranian, linux-kernel,
	dsahern, ravitillo, khandual, robert.richter, ming.m.lin,
	vweaver1, andi

[-- Attachment #1: Type: text/plain, Size: 930 bytes --]

On 3/5/12 12:35 PM, Arnaldo Carvalho de Melo wrote:
> Em Mon, Mar 05, 2012 at 05:30:33PM +0100, Peter Zijlstra escreveu:
>> On Mon, 2012-03-05 at 16:56 +0100, Ingo Molnar wrote:
>>> A third usability bug is that for some reason a branch sampling
>>> 'perf report' defaults to the stdio output on my testsystem -
>>> with --no-branch-stack it gives the regular tui.
>
>> Ah, I would not have noticed that since I still explicitly build my perf
>> without TUI support. That stuff mostly just gets in the way.
>
> Do you have any specific complaints about it?
>

I think --tui code paths have some bugs that cause it to SIGSEGV, while 
the --stdio paths don't. I think much of it has to do with how often 
that particular combination of command line options is used.

Here's an example:

# perf record -ag -- sleep 3
# perf report -G -s pid --tui # SIGSEGV
# perf report -G -s pid --stdio # works fine

Details attached.

  -Arun




[-- Attachment #2: perf-debug.txt --]
[-- Type: text/plain, Size: 4716 bytes --]

                                                                                                                                                                                                                  
Program received signal SIGSEGV, Segmentation fault.                                                                                                                                                              
[Switching to Thread 139975795275488 (LWP 13155)]                                                                                                                                                                 
symbol__inc_addr_samples (sym=0x91eb90, map=<value optimized out>, evidx=0, addr=120416) at util/annotate.c:73                                                                                                    
73      util/annotate.c: No such file or directory.                                                                                                                                                               
        in util/annotate.c                                                                                                                                                                                        
(gdb) bt                                                                                                                                                                                                          
#0  symbol__inc_addr_samples (sym=0x91eb90, map=<value optimized out>, evidx=0, addr=120416) at util/annotate.c:73                                                                                                
#1  0x0000000000410b5d in process_sample_event (tool=<value optimized out>, event=0x7f4ea28e26b0, sample=0x7fffcc1ca8f0, evsel=0x823ea0, machine=0x822290) at builtin-report.c:127                                
#2  0x00000000004433ca in flush_sample_queue (s=0x822230, tool=0x7fffcc1cc340) at util/session.c:528                                                                                                              
#3  0x0000000000444d16 in __perf_session__process_events (session=0x822230, data_offset=<value optimized out>, data_size=<value optimized out>, file_size=<value optimized out>, tool=0x7fffcc1cc340)             
    at util/session.c:1175                                                                                                                                                                                        
#4  0x0000000000445217 in perf_session__process_events (self=0x822230, tool=0x7fffcc1cc340) at util/session.c:1191                                                                                                
#5  0x000000000041015b in cmd_report (argc=0, argv=0x7fffcc1cc830, prefix=<value optimized out>) at builtin-report.c:311                                                                                          
#6  0x00000000004051b9 in handle_internal_command (argc=4, argv=0x7fffcc1cc830) at perf.c:273                                                                                                                     
#7  0x0000000000405623 in main (argc=4, argv=0x479218) at perf.c:388                                                                                                                                              
                                                                                                                                                                                      
(gdb) p /x sym->start                                                                                                                                                                                          
$5 = 0xffffffff8100fb74                                                                                                                                                                                           
(gdb) p /x addr
$6 = 0x1d660
(gdb) p offset
$7 = 2130762476

54 int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
55                              int evidx, u64 addr)
56 {
57         unsigned offset;
58         struct annotation *notes;
59         struct sym_hist *h;
60 
61         notes = symbol__annotation(sym);
62         if (notes->src == NULL)
63                 return -ENOMEM;
64 
65         pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr));
66 
67         if (addr >= sym->end)
68                 return 0;
69 
70         offset = addr - sym->start;
71         h = annotation__histogram(notes, evidx);
72         h->sum++;
73         h->addr[offset]++; <-- potential bad memory reference

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-05 21:43           ` Arun Sharma
@ 2012-03-05 22:26             ` Arnaldo Carvalho de Melo
  2012-03-05 23:35               ` Arun Sharma
  0 siblings, 1 reply; 37+ messages in thread
From: Arnaldo Carvalho de Melo @ 2012-03-05 22:26 UTC (permalink / raw)
  To: Arun Sharma
  Cc: Peter Zijlstra, Ingo Molnar, Stephane Eranian, linux-kernel,
	dsahern, ravitillo, khandual, robert.richter, ming.m.lin,
	vweaver1, andi

Em Mon, Mar 05, 2012 at 01:43:23PM -0800, Arun Sharma escreveu:
> On 3/5/12 12:35 PM, Arnaldo Carvalho de Melo wrote:
> >Em Mon, Mar 05, 2012 at 05:30:33PM +0100, Peter Zijlstra escreveu:
> >>On Mon, 2012-03-05 at 16:56 +0100, Ingo Molnar wrote:
> >Do you have any specific complaints about it?
> 
> I think --tui code paths have some bugs that cause it to SIGSEGV,
> while the --stdio paths don't. I think much of it has to do with how

We'll have the same problem with --gtk, i.e. as much as we librarise the
backend code, there will be bugs in the frontends, but then we'll fix as
in this case.

What I was more concerned was what usability problems Peter and Stephane
had with the TUI, i.e. any missing feature they like on --stdio, or any
annoyance wrt color schemes, key bindings, etc.

I made an effort to try and have the look and feel of the TUI to
resemble as much as possible the one in --stdio, while allowing
usability improvements not possible in the --stdio case, like:

. Folding/unfolding callchains
. Zooming by DSO, etc
. Going to annotate and back really quickly
. Initial support for navigation in the annotate window (just callq for
  now, but jumps, etc are on the plans)
. Toggling source code on/off on the annotation window

> often that particular combination of command line options is used.
> 
> Here's an example:
> 
> # perf record -ag -- sleep 3
> # perf report -G -s pid --tui # SIGSEGV

Ok, now this is a good report, I managed to reproduce and will work on a
fix, thanks,

- Arnaldo

> # perf report -G -s pid --stdio # works fine
> 
> Details attached.
> 
>  -Arun
> 
> 
> 

>                                                                                                                                                                                                                   
> Program received signal SIGSEGV, Segmentation fault.                                                                                                                                                              
> [Switching to Thread 139975795275488 (LWP 13155)]                                                                                                                                                                 
> symbol__inc_addr_samples (sym=0x91eb90, map=<value optimized out>, evidx=0, addr=120416) at util/annotate.c:73                                                                                                    
> 73      util/annotate.c: No such file or directory.                                                                                                                                                               
>         in util/annotate.c                                                                                                                                                                                        
> (gdb) bt                                                                                                                                                                                                          
> #0  symbol__inc_addr_samples (sym=0x91eb90, map=<value optimized out>, evidx=0, addr=120416) at util/annotate.c:73                                                                                                
> #1  0x0000000000410b5d in process_sample_event (tool=<value optimized out>, event=0x7f4ea28e26b0, sample=0x7fffcc1ca8f0, evsel=0x823ea0, machine=0x822290) at builtin-report.c:127                                
> #2  0x00000000004433ca in flush_sample_queue (s=0x822230, tool=0x7fffcc1cc340) at util/session.c:528                                                                                                              
> #3  0x0000000000444d16 in __perf_session__process_events (session=0x822230, data_offset=<value optimized out>, data_size=<value optimized out>, file_size=<value optimized out>, tool=0x7fffcc1cc340)             
>     at util/session.c:1175                                                                                                                                                                                        
> #4  0x0000000000445217 in perf_session__process_events (self=0x822230, tool=0x7fffcc1cc340) at util/session.c:1191                                                                                                
> #5  0x000000000041015b in cmd_report (argc=0, argv=0x7fffcc1cc830, prefix=<value optimized out>) at builtin-report.c:311                                                                                          
> #6  0x00000000004051b9 in handle_internal_command (argc=4, argv=0x7fffcc1cc830) at perf.c:273                                                                                                                     
> #7  0x0000000000405623 in main (argc=4, argv=0x479218) at perf.c:388                                                                                                                                              
>                                                                                                                                                                                       
> (gdb) p /x sym->start                                                                                                                                                                                          
> $5 = 0xffffffff8100fb74                                                                                                                                                                                           
> (gdb) p /x addr
> $6 = 0x1d660
> (gdb) p offset
> $7 = 2130762476
> 
> 54 int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
> 55                              int evidx, u64 addr)
> 56 {
> 57         unsigned offset;
> 58         struct annotation *notes;
> 59         struct sym_hist *h;
> 60 
> 61         notes = symbol__annotation(sym);
> 62         if (notes->src == NULL)
> 63                 return -ENOMEM;
> 64 
> 65         pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr));
> 66 
> 67         if (addr >= sym->end)
> 68                 return 0;
> 69 
> 70         offset = addr - sym->start;
> 71         h = annotation__histogram(notes, evidx);
> 72         h->sum++;
> 73         h->addr[offset]++; <-- potential bad memory reference


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-05 22:26             ` Arnaldo Carvalho de Melo
@ 2012-03-05 23:35               ` Arun Sharma
  2012-03-06  3:06                 ` Arnaldo Carvalho de Melo
  2012-03-06  6:25                 ` Ingo Molnar
  0 siblings, 2 replies; 37+ messages in thread
From: Arun Sharma @ 2012-03-05 23:35 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Ingo Molnar, Stephane Eranian, linux-kernel,
	dsahern, ravitillo, khandual, robert.richter, ming.m.lin,
	vweaver1, andi

Like you probably figured from my other mail, we deal with deeply nested 
callchains with unwieldy function names a lot -- thanks to C++ and 
template programming. --tui's collapsing/expanding functionality is 
quite useful to navigate that mess. I'm just taking this opportunity to 
get some attention focused on improving it :)

On 3/5/12 2:26 PM, Arnaldo Carvalho de Melo wrote:

>> Here's an example:
>>
>> # perf record -ag -- sleep 3
>> # perf report -G -s pid --tui # SIGSEGV
>
> Ok, now this is a good report, I managed to reproduce and will work on a
> fix, thanks,

Something like this seems to do it for me.

         offset = addr - sym->start;
+       len = sym->end - sym->start;
+       if (offset >= len)
+               return 0;
+

The other problem area seems to be callchains when using -p regexp -x 
options. I'll try to summarize problems there in a separate thread.

  -Arun

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-05 23:35               ` Arun Sharma
@ 2012-03-06  3:06                 ` Arnaldo Carvalho de Melo
  2012-03-06  6:27                   ` Ingo Molnar
  2012-03-06  6:25                 ` Ingo Molnar
  1 sibling, 1 reply; 37+ messages in thread
From: Arnaldo Carvalho de Melo @ 2012-03-06  3:06 UTC (permalink / raw)
  To: Arun Sharma
  Cc: Peter Zijlstra, Ingo Molnar, Stephane Eranian, linux-kernel,
	dsahern, ravitillo, khandual, robert.richter, ming.m.lin,
	vweaver1, andi

Em Mon, Mar 05, 2012 at 03:35:35PM -0800, Arun Sharma escreveu:
> Like you probably figured from my other mail, we deal with deeply
> nested callchains with unwieldy function names a lot -- thanks to C++
> and template programming. --tui's collapsing/expanding functionality
> is quite useful to navigate that mess. I'm just taking this
> opportunity to get some attention focused on improving it :)

Excellent!

I can think about other Zoom operations, like zooming into just the
entries where some specific function in its callchains, say the one
under the cursor, appears, etc.
 
> On 3/5/12 2:26 PM, Arnaldo Carvalho de Melo wrote:
> 
> >>Here's an example:
> >>
> >># perf record -ag -- sleep 3
> >># perf report -G -s pid --tui # SIGSEGV
> >
> >Ok, now this is a good report, I managed to reproduce and will work on a
> >fix, thanks,
> 
> Something like this seems to do it for me.
> 
>         offset = addr - sym->start;
> +       len = sym->end - sym->start;
> +       if (offset >= len)
> +               return 0;
> +

That is my fault, I should have added a BUG_ON() spitting out a
callchain in this case, as that function shouldn't be called if the
address is not within its range :-\
 
> The other problem area seems to be callchains when using -p regexp
> -x options. I'll try to summarize problems there in a separate
> thread.

Please do,

Thanks a lot!

- Arnaldo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-06  3:06                 ` Arnaldo Carvalho de Melo
@ 2012-03-06  6:27                   ` Ingo Molnar
  0 siblings, 0 replies; 37+ messages in thread
From: Ingo Molnar @ 2012-03-06  6:27 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Arun Sharma, Peter Zijlstra, Stephane Eranian, linux-kernel,
	dsahern, ravitillo, khandual, robert.richter, ming.m.lin,
	vweaver1, andi


* Arnaldo Carvalho de Melo <acme@redhat.com> wrote:

> > Something like this seems to do it for me.
> > 
> >         offset = addr - sym->start;
> > +       len = sym->end - sym->start;
> > +       if (offset >= len)
> > +               return 0;
> > +
> 
> That is my fault, I should have added a BUG_ON() spitting out 
> a callchain in this case, as that function shouldn't be called 
> if the address is not within its range :-\

Btw., I'd suggest not doing a BUG_ON() but some less destructive 
warning: both symbol table errors and sample stream hickups are 
common and can lead to essentially arbitrary input data - we 
shouldn't crash on that.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-05 23:35               ` Arun Sharma
  2012-03-06  3:06                 ` Arnaldo Carvalho de Melo
@ 2012-03-06  6:25                 ` Ingo Molnar
  2012-03-07  1:57                   ` [RFC] perf report: Implement symbol filtering on TUI Namhyung Kim
  1 sibling, 1 reply; 37+ messages in thread
From: Ingo Molnar @ 2012-03-06  6:25 UTC (permalink / raw)
  To: Arun Sharma
  Cc: Arnaldo Carvalho de Melo, Peter Zijlstra, Stephane Eranian,
	linux-kernel, dsahern, ravitillo, khandual, robert.richter,
	ming.m.lin, vweaver1, andi


* Arun Sharma <asharma@fb.com> wrote:

> Something like this seems to do it for me.
> 
>         offset = addr - sym->start;
> +       len = sym->end - sym->start;
> +       if (offset >= len)
> +               return 0;
> +

It would be nice to not have such inconsistent sym entries to 
begin with - i.e. to filter in the symbol code, not in the GUI 
front-end code.

> The other problem area seems to be callchains when using -p 
> regexp -x options. I'll try to summarize problems there in a 
> separate thread.

Btw., I have a text/regex filtering feature request there going 
beyond the issue of parent filtering, I often would love to be 
able to filter the sampled function itself:

  perf report sched

or:

  perf report time

or:

  perf report perf

to only see the list of (kernel) functions whose name name 
matches those patterns. (and skip all other functions)

Especially when I want to improve the tail portion of the 
profile this would be pretty useful. Today I can only do that 
with --stdio:

  perf report | grep sched

The -S option is too strict, it only allows individual symbols, 
no filters. Also, I hate typing '-S' ;-)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [RFC] perf report: Implement symbol filtering on TUI
  2012-03-06  6:25                 ` Ingo Molnar
@ 2012-03-07  1:57                   ` Namhyung Kim
  2012-03-07  6:07                     ` Ingo Molnar
  2012-03-14 23:11                     ` Arun Sharma
  0 siblings, 2 replies; 37+ messages in thread
From: Namhyung Kim @ 2012-03-07  1:57 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern,
	ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi

As Ingo requested, symbol filtering feature was missing on TUI.
Add 's' key to get input from user, and do simple filtering by
strstr(). To turn filtering off, just enter no name by pressing
's' followed by ENTER.

There should be many issues, but I just want to release this
to get some feedbacks.

Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
---
 tools/perf/builtin-report.c         |   21 +++++++--
 tools/perf/util/hist.c              |   31 ++++++++++++++
 tools/perf/util/hist.h              |    2 +
 tools/perf/util/ui/browser.h        |    2 +
 tools/perf/util/ui/browsers/hists.c |   17 +++++++-
 tools/perf/util/ui/keysyms.h        |    3 +
 tools/perf/util/ui/util.c           |   78 +++++++++++++++++++++++++++++++++++
 7 files changed, 148 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 25d34d483e49..64878b63f4f8 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -50,6 +50,7 @@ struct perf_report {
 	const char		*pretty_printing_style;
 	symbol_filter_t		annotate_init;
 	const char		*cpu_list;
+	const char		*symbol_filter_str;
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
@@ -326,6 +327,11 @@ static int __cmd_report(struct perf_report *rep)
 	}
 
 	if (use_browser > 0) {
+		struct perf_evsel *first;
+		first = list_entry(session->evlist->entries.next,
+				   struct perf_evsel, node);
+		first->hists.symbol_filter_str = rep->symbol_filter_str;
+
 		perf_evlist__tui_browse_hists(session->evlist, help,
 					      NULL, NULL, 0);
 	} else
@@ -586,11 +592,16 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
 	} else
 		symbol_conf.exclude_other = false;
 
-	/*
-	 * Any (unrecognized) arguments left?
-	 */
-	if (argc)
-		usage_with_options(report_usage, options);
+	if (argc) {
+		/*
+		 * Special case: if there's an argument left then assume tha
+		 * it's a symbol filter:
+		 */
+		if (argc > 1)
+			usage_with_options(report_usage, options);
+
+		report.symbol_filter_str = argv[0];
+	}
 
 	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
 	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 6f505d1abac7..56f19d86b0cd 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -15,6 +15,7 @@ enum hist_filter {
 	HIST_FILTER__DSO,
 	HIST_FILTER__THREAD,
 	HIST_FILTER__PARENT,
+	HIST_FILTER__SYMBOL,
 };
 
 struct callchain_param	callchain_param = {
@@ -1179,6 +1180,36 @@ void hists__filter_by_thread(struct hists *hists)
 	}
 }
 
+static bool hists__filter_entry_by_symbol(struct hists *hists,
+					  struct hist_entry *he)
+{
+	if (hists->symbol_filter_str != NULL && he->ms.sym &&
+	    strstr(he->ms.sym->name, hists->symbol_filter_str) == NULL) {
+		he->filtered |= (1 << HIST_FILTER__SYMBOL);
+		return true;
+	}
+
+	return false;
+}
+
+void hists__filter_by_symbol(struct hists *hists)
+{
+	struct rb_node *nd;
+
+	hists->nr_entries = hists->stats.total_period = 0;
+	hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0;
+	hists__reset_col_len(hists);
+
+	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		if (hists__filter_entry_by_symbol(hists, h))
+			continue;
+
+		hists__remove_entry_filter(hists, h, HIST_FILTER__SYMBOL);
+	}
+}
+
 int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip)
 {
 	return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 48e5acd1e862..020d3477cabc 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -57,6 +57,7 @@ struct hists {
 	const struct thread	*thread_filter;
 	const struct dso	*dso_filter;
 	const char		*uid_filter_str;
+	const char		*symbol_filter_str;
 	pthread_mutex_t		lock;
 	struct events_stats	stats;
 	u64			event_stream;
@@ -96,6 +97,7 @@ int hist_entry__annotate(struct hist_entry *self, size_t privsize);
 
 void hists__filter_by_dso(struct hists *hists);
 void hists__filter_by_thread(struct hists *hists);
+void hists__filter_by_symbol(struct hists *hists);
 
 u16 hists__col_len(struct hists *self, enum hist_column col);
 void hists__set_col_len(struct hists *self, enum hist_column col, u16 len);
diff --git a/tools/perf/util/ui/browser.h b/tools/perf/util/ui/browser.h
index 84d761b730c1..6ee82f60feaf 100644
--- a/tools/perf/util/ui/browser.h
+++ b/tools/perf/util/ui/browser.h
@@ -49,6 +49,8 @@ int ui_browser__warning(struct ui_browser *browser, int timeout,
 			const char *format, ...);
 int ui_browser__help_window(struct ui_browser *browser, const char *text);
 bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text);
+int ui_browser__input_window(const char *title, const char *text, char *input,
+			     const char *exit_msg, int delay_sec);
 
 void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence);
 unsigned int ui_browser__argv_refresh(struct ui_browser *browser);
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index bfba0490c098..6a7c918e7b4c 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -863,6 +863,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	struct hist_browser *browser = hist_browser__new(self);
 	struct pstack *fstack;
 	int key = -1;
+	char buf[64];
 
 	if (browser == NULL)
 		return -1;
@@ -871,6 +872,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	if (fstack == NULL)
 		goto out;
 
+	hists__filter_by_symbol(self);
 	ui_helpline__push(helpline);
 
 	while (1) {
@@ -915,6 +917,18 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			goto zoom_dso;
 		case 't':
 			goto zoom_thread;
+		case 's':
+			if (ui_browser__input_window("Symbol to show",
+					"Please enter the name of symbol you want to see",
+					buf, "ENTER: OK, ESC: Cancel",
+					delay_secs * 2) == K_ENTER) {
+				self->symbol_filter_str = buf;
+				ui_helpline__fpush("filtered by \"%s\"",
+						   self->symbol_filter_str);
+			}
+			hists__filter_by_symbol(self);
+			hist_browser__reset(browser);
+			continue;
 		case K_F1:
 		case 'h':
 		case '?':
@@ -932,7 +946,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 					"C             Collapse all callchains\n"
 					"E             Expand all callchains\n"
 					"d             Zoom into current DSO\n"
-					"t             Zoom into current Thread");
+					"t             Zoom into current Thread\n"
+					"s             Filter symbol by name");
 			continue;
 		case K_ENTER:
 		case K_RIGHT:
diff --git a/tools/perf/util/ui/keysyms.h b/tools/perf/util/ui/keysyms.h
index 3458b1985761..7bc34084d663 100644
--- a/tools/perf/util/ui/keysyms.h
+++ b/tools/perf/util/ui/keysyms.h
@@ -16,6 +16,9 @@
 #define K_TAB	'\t'
 #define K_UNTAB	SL_KEY_UNTAB
 #define K_UP	SL_KEY_UP
+//#define K_BKSPC SL_KEY_BACKSPACE
+#define K_BKSPC 0x7f
+#define K_DEL	SL_KEY_DELETE
 
 /* Not really keys */
 #define K_TIMER	 -1
diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c
index 45daa7c41dad..360f43fd5400 100644
--- a/tools/perf/util/ui/util.c
+++ b/tools/perf/util/ui/util.c
@@ -69,6 +69,84 @@ int ui__popup_menu(int argc, char * const argv[])
 	return popup_menu__run(&menu);
 }
 
+int ui_browser__input_window(const char *title, const char *text, char *input,
+			     const char *exit_msg, int delay_secs)
+{
+	int x, y, len, key;
+	int max_len = 60, nr_lines = 0;
+	static char buf[50];
+	const char *t;
+
+	t = text;
+	while (1) {
+		const char *sep = strchr(t, '\n');
+
+		if (sep == NULL)
+			sep = strchr(t, '\0');
+		len = sep - t;
+		if (max_len < len)
+			max_len = len;
+		++nr_lines;
+		if (*sep == '\0')
+			break;
+		t = sep + 1;
+	}
+
+	max_len += 2;
+	nr_lines += 8;
+	y = SLtt_Screen_Rows / 2 - nr_lines / 2;
+	x = SLtt_Screen_Cols / 2 - max_len / 2;
+
+	SLsmg_set_color(0);
+	SLsmg_draw_box(y, x++, nr_lines, max_len);
+	if (title) {
+		SLsmg_gotorc(y, x + 1);
+		SLsmg_write_string((char *)title);
+	}
+	SLsmg_gotorc(++y, x);
+	nr_lines -= 7;
+	max_len -= 2;
+	SLsmg_write_wrapped_string((unsigned char *)text, y, x,
+				   nr_lines, max_len, 1);
+	y += nr_lines + 1;
+	SLsmg_set_color(0);
+	SLsmg_draw_box(y - 1, x + 1, 3, max_len - 2);
+
+	SLsmg_gotorc(y + 3, x);
+	SLsmg_write_nstring((char *)exit_msg, max_len);
+	SLsmg_refresh();
+
+	x += 2;
+	len = 0;
+	key = ui__getch(delay_secs);
+	while (key != K_TIMER && key != K_ENTER && key != K_ESC) {
+		if (key == K_BKSPC) {
+			if (len == 0)
+				goto next_key;
+			SLsmg_gotorc(y, x + --len);
+			SLsmg_write_char(' ');
+		} else {
+			buf[len] = key;
+			SLsmg_gotorc(y, x + len++);
+			SLsmg_write_char(key);
+		}
+		SLsmg_refresh();
+
+		/* XXX more graceful overflow handling needed */
+		if (len == sizeof(buf) - 1) {
+			ui_helpline__push("maximum size of symbol name reached!");
+			key = K_ENTER;
+			break;
+		}
+next_key:
+		key = ui__getch(delay_secs);
+	}
+
+	buf[len] = '\0';
+	strncpy(input, buf, len+1);
+	return key;
+}
+
 int ui__question_window(const char *title, const char *text,
 			const char *exit_msg, int delay_secs)
 {
-- 
1.7.9


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [RFC] perf report: Implement symbol filtering on TUI
  2012-03-07  1:57                   ` [RFC] perf report: Implement symbol filtering on TUI Namhyung Kim
@ 2012-03-07  6:07                     ` Ingo Molnar
  2012-03-07  8:04                       ` Namhyung Kim
  2012-03-14 23:11                     ` Arun Sharma
  1 sibling, 1 reply; 37+ messages in thread
From: Ingo Molnar @ 2012-03-07  6:07 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern,
	ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi


* Namhyung Kim <namhyung.kim@lge.com> wrote:

> As Ingo requested, symbol filtering feature was missing on TUI.
> Add 's' key to get input from user, and do simple filtering by
> strstr(). To turn filtering off, just enter no name by pressing
> 's' followed by ENTER.
> 
> There should be many issues, but I just want to release this
> to get some feedbacks.

I'd love it if in addition to the hotkey, if I typed the obvious 
sequence:

 $ perf report sched

... then it would turn into such a filter automagically.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC] perf report: Implement symbol filtering on TUI
  2012-03-07  6:07                     ` Ingo Molnar
@ 2012-03-07  8:04                       ` Namhyung Kim
  2012-03-08 10:44                         ` Ingo Molnar
  0 siblings, 1 reply; 37+ messages in thread
From: Namhyung Kim @ 2012-03-07  8:04 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern,
	ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi

Hi, Ingo

2012-03-07 3:07 PM, Ingo Molnar wrote:
>
> * Namhyung Kim <namhyung.kim@lge.com>  wrote:
>
>> As Ingo requested, symbol filtering feature was missing on TUI.
>> Add 's' key to get input from user, and do simple filtering by
>> strstr(). To turn filtering off, just enter no name by pressing
>> 's' followed by ENTER.
>>
>> There should be many issues, but I just want to release this
>> to get some feedbacks.
>
> I'd love it if in addition to the hotkey, if I typed the obvious
> sequence:
>
>   $ perf report sched
>
> ... then it would turn into such a filter automagically.
>

Oh, I implemented that already. Please test it! :)

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC] perf report: Implement symbol filtering on TUI
  2012-03-07  8:04                       ` Namhyung Kim
@ 2012-03-08 10:44                         ` Ingo Molnar
  2012-03-09  1:53                           ` Namhyung Kim
  0 siblings, 1 reply; 37+ messages in thread
From: Ingo Molnar @ 2012-03-08 10:44 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern,
	ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi


* Namhyung Kim <namhyung.kim@lge.com> wrote:

> Hi, Ingo
> 
> 2012-03-07 3:07 PM, Ingo Molnar wrote:
> >
> >* Namhyung Kim <namhyung.kim@lge.com>  wrote:
> >
> >>As Ingo requested, symbol filtering feature was missing on TUI.
> >>Add 's' key to get input from user, and do simple filtering by
> >>strstr(). To turn filtering off, just enter no name by pressing
> >>'s' followed by ENTER.
> >>
> >>There should be many issues, but I just want to release this
> >>to get some feedbacks.
> >
> >I'd love it if in addition to the hotkey, if I typed the obvious
> >sequence:
> >
> >  $ perf report sched
> >
> >... then it would turn into such a filter automagically.
> >
> 
> Oh, I implemented that already. Please test it! :)

Cool - I tried it out and it works just as it should!

I noticed two details:

 - "perf report sched | less" does not work as expected - such 
   kinds of features should be GUI-frontend agnostic.

 - unknown symbols are not matched, and thus they will show up 
   indiscrimnately even though I only want to see them if the 
  filter is something like '0x' or 'unknown'.

Anyway, apart from these two details:

 Tested-by: Ingo Molnar <mingo@elte.hu>

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC] perf report: Implement symbol filtering on TUI
  2012-03-08 10:44                         ` Ingo Molnar
@ 2012-03-09  1:53                           ` Namhyung Kim
  2012-03-09  7:36                             ` Ingo Molnar
  0 siblings, 1 reply; 37+ messages in thread
From: Namhyung Kim @ 2012-03-09  1:53 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern,
	ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi

Hi,

2012-03-08 7:44 PM, Ingo Molnar wrote:
>
> * Namhyung Kim <namhyung.kim@lge.com>  wrote:
>
>> Hi, Ingo
>>
>> 2012-03-07 3:07 PM, Ingo Molnar wrote:
>>>
>>> * Namhyung Kim <namhyung.kim@lge.com>   wrote:
>>>
>>>> As Ingo requested, symbol filtering feature was missing on TUI.
>>>> Add 's' key to get input from user, and do simple filtering by
>>>> strstr(). To turn filtering off, just enter no name by pressing
>>>> 's' followed by ENTER.
>>>>
>>>> There should be many issues, but I just want to release this
>>>> to get some feedbacks.
>>>
>>> I'd love it if in addition to the hotkey, if I typed the obvious
>>> sequence:
>>>
>>>   $ perf report sched
>>>
>>> ... then it would turn into such a filter automagically.
>>>
>>
>> Oh, I implemented that already. Please test it! :)
>
> Cool - I tried it out and it works just as it should!
>
> I noticed two details:
>
>   - "perf report sched | less" does not work as expected - such
>     kinds of features should be GUI-frontend agnostic.
>

Will fix.


>   - unknown symbols are not matched, and thus they will show up
>     indiscrimnately even though I only want to see them if the
>    filter is something like '0x' or 'unknown'.
>

Since they have no symbol. :) In the current implementation, it will
actually show you such symbols if you enter '0x' or 'unknown' as a filter 
unless there're symbols that have those letters in its name.

I can think of 3 solutions for this now:

1. Adding a special filter keyword (like 'unknown'). But there's probably some 
symbols which have those letters.

2. If filter string consists of (hex-) digits only, it will only show hist 
entries doesn't have symbols, or tries to match based on its ip.

3. Implement zooming-in to "unknown" dso. Maybe it's a different issue, but I 
think it's good to have and it'll helps this too.

What do you guys think?


> Anyway, apart from these two details:
>
>   Tested-by: Ingo Molnar <mingo@elte.hu>
>

Thanks for testing and suggestions.
Namhyung



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC] perf report: Implement symbol filtering on TUI
  2012-03-09  1:53                           ` Namhyung Kim
@ 2012-03-09  7:36                             ` Ingo Molnar
  2012-03-09  8:03                               ` Namhyung Kim
  0 siblings, 1 reply; 37+ messages in thread
From: Ingo Molnar @ 2012-03-09  7:36 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern,
	ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi


* Namhyung Kim <namhyung.kim@lge.com> wrote:

> >  - unknown symbols are not matched, and thus they will show up
> >    indiscrimnately even though I only want to see them if the
> >   filter is something like '0x' or 'unknown'.
> >
> 
> Since they have no symbol. :) In the current implementation, 
> it will actually show you such symbols if you enter '0x' or 
> 'unknown' as a filter unless there're symbols that have those 
> letters in its name.
> 
> I can think of 3 solutions for this now:
> 
> 1. Adding a special filter keyword (like 'unknown'). But there's
> probably some symbols which have those letters.
> 
> 2. If filter string consists of (hex-) digits only, it will only
> show hist entries doesn't have symbols, or tries to match based on
> its ip.
> 
> 3. Implement zooming-in to "unknown" dso. Maybe it's a different
> issue, but I think it's good to have and it'll helps this too.
> 
> What do you guys think?

Well, the main problem I had was that they showed up in the 
'perf report sched' filtered output and messed it up.

I.e. I'm not interested in unknown symbols much, I'm interested 
in *not* seeing them when I type 'perf report sched'.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC] perf report: Implement symbol filtering on TUI
  2012-03-09  7:36                             ` Ingo Molnar
@ 2012-03-09  8:03                               ` Namhyung Kim
  0 siblings, 0 replies; 37+ messages in thread
From: Namhyung Kim @ 2012-03-09  8:03 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern,
	ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi

Hi,

2012-03-09 4:36 PM, Ingo Molnar wrote:
> Well, the main problem I had was that they showed up in the
> 'perf report sched' filtered output and messed it up.
>
> I.e. I'm not interested in unknown symbols much, I'm interested
> in *not* seeing them when I type 'perf report sched'.
>

It should be easy to get rid of unknown symbols from filtered output. So I 
just wanted how to deal with if someone would need to show (part of) those 
symbols only. But now I guess it could be handled at dso/comm level.

Thanks,
Namhyung


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC] perf report: Implement symbol filtering on TUI
  2012-03-07  1:57                   ` [RFC] perf report: Implement symbol filtering on TUI Namhyung Kim
  2012-03-07  6:07                     ` Ingo Molnar
@ 2012-03-14 23:11                     ` Arun Sharma
  2012-03-15  0:44                       ` Namhyung Kim
  1 sibling, 1 reply; 37+ messages in thread
From: Arun Sharma @ 2012-03-14 23:11 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Ingo Molnar, Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern,
	ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi

On Wed, Mar 07, 2012 at 10:57:50AM +0900, Namhyung Kim wrote:
> As Ingo requested, symbol filtering feature was missing on TUI.
> Add 's' key to get input from user, and do simple filtering by
> strstr(). To turn filtering off, just enter no name by pressing
> 's' followed by ENTER.

Why not do this for --stdio as well?

perf report foo -g graph,0.5,caller -s inclusive --stdio

will print only the callgraph under foo.

This works better for me than:

perf report -s parent -p ^c$ --stdio

 -Arun

index 94394f3..607b21b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -219,6 +219,8 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 		struct hists *hists = &pos->hists;
 		const char *evname = event_name(pos);
 
+		hists->symbol_filter_str = rep->symbol_filter_str;
+		hists__filter_by_symbol(hists);
 		hists__fprintf_nr_sample_events(hists, evname, stdout);
 		hists__fprintf(hists, NULL, false, true, 0, 0, stdout);
 		fprintf(stdout, "\n\n");



^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [RFC] perf report: Implement symbol filtering on TUI
  2012-03-14 23:11                     ` Arun Sharma
@ 2012-03-15  0:44                       ` Namhyung Kim
  2012-03-15 21:46                         ` Arun Sharma
  0 siblings, 1 reply; 37+ messages in thread
From: Namhyung Kim @ 2012-03-15  0:44 UTC (permalink / raw)
  To: Arun Sharma
  Cc: Ingo Molnar, Namhyung Kim, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern,
	ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi

Hi,

2012-03-15 8:11 AM, Arun Sharma wrote:
> On Wed, Mar 07, 2012 at 10:57:50AM +0900, Namhyung Kim wrote:
>> As Ingo requested, symbol filtering feature was missing on TUI.
>> Add 's' key to get input from user, and do simple filtering by
>> strstr(). To turn filtering off, just enter no name by pressing
>> 's' followed by ENTER.
>
> Why not do this for --stdio as well?
>
> perf report foo -g graph,0.5,caller -s inclusive --stdio
>
> will print only the callgraph under foo.
>
> This works better for me than:
>
> perf report -s parent -p ^c$ --stdio
>
>   -Arun

Thanks for reviewing and sending the patch.

However this was already fixed on my new patch set. Please see below:

https://lkml.org/lkml/2012/3/13/73


Thanks,
Namhyung


>
> index 94394f3..607b21b 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -219,6 +219,8 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
>   		struct hists *hists =&pos->hists;
>   		const char *evname = event_name(pos);
>
> +		hists->symbol_filter_str = rep->symbol_filter_str;
> +		hists__filter_by_symbol(hists);
>   		hists__fprintf_nr_sample_events(hists, evname, stdout);
>   		hists__fprintf(hists, NULL, false, true, 0, 0, stdout);
>   		fprintf(stdout, "\n\n");
>
>


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC] perf report: Implement symbol filtering on TUI
  2012-03-15  0:44                       ` Namhyung Kim
@ 2012-03-15 21:46                         ` Arun Sharma
  0 siblings, 0 replies; 37+ messages in thread
From: Arun Sharma @ 2012-03-15 21:46 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Ingo Molnar, Namhyung Kim, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern,
	ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi

On 3/14/12 5:44 PM, Namhyung Kim wrote:

> Thanks for reviewing and sending the patch.
>
> However this was already fixed on my new patch set. Please see below:
>
> https://lkml.org/lkml/2012/3/13/73
>

I still get different behaviors with and without my two line patch.

Try:

perf record -g
perf report c -G -g graph,0.5,caller -s inclusive --stdio

I don't see where hists__filter_by_symbol() gets called when using --stdio.

But let's examine what I think is a more important general issue (not 
specific to your patch):

Here's the behavior my coworkers are requesting. With the test program I 
mailed earlier with this callgraph:

./perf report -G -g graph,0.5,caller -s pid --stdio

   100.00%            foo:30804
             |
             --- __libc_start_main
                |
                 --99.87%-- main
                           |
                           |--50.01%-- a
                           |          |
                           |           --42.71%-- c
                           |                     |
                           |                     |--14.36%-- e
                           |                     |          |
                           |                     |           --7.09%-- f
                           |                     |
                           |                      --14.33%-- d
                           |                                |
                           |                                 --6.81%-- f
                           |
                            --49.86%-- b
                                      |
                                       --42.72%-- c
                                                 |
                                                 |--14.43%-- d
                                                 |          |
                                                 |           --7.30%-- f
                                                 |
                                                  --14.13%-- e
                                                            |
                                                             --7.05%-- f

If the user filters by "d", we should:

a) Construct a callgraph that has only callchains containing "d"
b) Express percentages as a fraction of the unfiltered total_period
c) Allow users to expand callchains and see both the callers and callees 
of d.

Right now, I can do this in two steps:

# Get the callgraph leading to "d"
perf report -S d -G -g graph,0.5,callee

  100.00%      foo  foo
                 |
                 --- __libc_start_main
                     main
                    |
                    |--51.37%-- a
                    |          c
                    |          d
                    |
                     --48.63%-- b
                               c
                               d

# Get the callgraph below "d" (i.e. where d is the caller)

perf report d -G -g graph,0.5,caller -s inclusive

    95.56%  [.] d
             |
             --- d
                |
                 --46.88%-- f

and the percentages are not expressed as a percentages of the total_period.

I'll send out a RFC patch next.

  -Arun

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-05 15:47 ` Ingo Molnar
  2012-03-05 15:50   ` Ingo Molnar
@ 2012-03-05 15:52   ` Stephane Eranian
  2012-03-07 12:49   ` Stephane Eranian
  2 siblings, 0 replies; 37+ messages in thread
From: Stephane Eranian @ 2012-03-05 15:52 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, acme, peterz, dsahern, ravitillo, khandual, asharma,
	robert.richter, ming.m.lin, vweaver1, andi, Peter Zijlstra

On Mon, Mar 5, 2012 at 4:47 PM, Ingo Molnar <mingo@elte.hu> wrote:
>
>
> Another usability bug of the new branch sampling feature is that
> typing the obvious:
>
>  $ perf record -b ./myprog
>
> does not do the obvious thing and default to 'any' but throws a
> *very* unhelpful generic perf error:
>
Fair enough. I can add code  to handle this case.

>
>  usage: perf record [<options>] [<command>]
>    or: perf record [<options>] -- <command> [<options>]
>
>  ...
>
> Really, we should do better than this. Only people who *want* to
> specify finer LBR filters should be forced to specify them.
>
> Thanks,
>
>        Ingo

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] perf report: auto-detect branch stack sampling mode
  2012-03-05 15:47 ` Ingo Molnar
  2012-03-05 15:50   ` Ingo Molnar
  2012-03-05 15:52   ` [PATCH] perf report: auto-detect branch stack sampling mode Stephane Eranian
@ 2012-03-07 12:49   ` Stephane Eranian
  2 siblings, 0 replies; 37+ messages in thread
From: Stephane Eranian @ 2012-03-07 12:49 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, acme, peterz, dsahern, ravitillo, khandual, asharma,
	robert.richter, ming.m.lin, vweaver1, andi, Peter Zijlstra

On Mon, Mar 5, 2012 at 4:47 PM, Ingo Molnar <mingo@elte.hu> wrote:
>
> Another usability bug of the new branch sampling feature is that
> typing the obvious:
>
>  $ perf record -b ./myprog
>
> does not do the obvious thing and default to 'any' but throws a
> *very* unhelpful generic perf error:
>
Ok, so looked at this. Given the option parsing code, looks like
we would have to split the option in two: -b and something else
to disambiguate cmdline such as:

$ perf record -b foo
$ perf record -b any_call foo
any_call: no such file or directory

The code cannot disambiguate between any_call being a branch filter
vs. the command
to run, given that -b has now an optional argument.

What we can do is split: -b and --branch-filter for instance. The former
takes no argument and sets up branch-stack to ANY. The other one requires
a parameter.

I am almost done with  TUI navigation of branch samples.

>  usage: perf record [<options>] [<command>]
>    or: perf record [<options>] -- <command> [<options>]
>
>  ...
>
> Really, we should do better than this. Only people who *want* to
> specify finer LBR filters should be forced to specify them.
>
> Thanks,
>
>        Ingo

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2012-03-15 21:46 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-24  9:40 [PATCH] perf report: auto-detect branch stack sampling mode Stephane Eranian
2012-02-24 15:24 ` David Ahern
2012-02-24 15:28   ` Stephane Eranian
2012-02-24 15:31     ` David Ahern
2012-02-24 15:40       ` Stephane Eranian
2012-02-24 15:49         ` David Ahern
2012-02-24 15:51           ` Stephane Eranian
2012-03-02 17:47             ` Stephane Eranian
2012-03-02 19:08               ` Arnaldo Carvalho de Melo
2012-03-03 19:43                 ` Arnaldo Carvalho de Melo
2012-03-05 10:49                   ` Ingo Molnar
2012-03-05 11:11                     ` Peter Zijlstra
2012-03-05 15:47 ` Ingo Molnar
2012-03-05 15:50   ` Ingo Molnar
2012-03-05 15:56     ` Ingo Molnar
2012-03-05 16:30       ` Peter Zijlstra
2012-03-05 16:32         ` Stephane Eranian
2012-03-05 17:20           ` Ingo Molnar
2012-03-05 20:35         ` Arnaldo Carvalho de Melo
2012-03-05 21:43           ` Arun Sharma
2012-03-05 22:26             ` Arnaldo Carvalho de Melo
2012-03-05 23:35               ` Arun Sharma
2012-03-06  3:06                 ` Arnaldo Carvalho de Melo
2012-03-06  6:27                   ` Ingo Molnar
2012-03-06  6:25                 ` Ingo Molnar
2012-03-07  1:57                   ` [RFC] perf report: Implement symbol filtering on TUI Namhyung Kim
2012-03-07  6:07                     ` Ingo Molnar
2012-03-07  8:04                       ` Namhyung Kim
2012-03-08 10:44                         ` Ingo Molnar
2012-03-09  1:53                           ` Namhyung Kim
2012-03-09  7:36                             ` Ingo Molnar
2012-03-09  8:03                               ` Namhyung Kim
2012-03-14 23:11                     ` Arun Sharma
2012-03-15  0:44                       ` Namhyung Kim
2012-03-15 21:46                         ` Arun Sharma
2012-03-05 15:52   ` [PATCH] perf report: auto-detect branch stack sampling mode Stephane Eranian
2012-03-07 12:49   ` Stephane Eranian

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).