* [PATCH] perf report: auto-detect branch stack sampling mode
@ 2012-02-24 9:40 Stephane Eranian
2012-02-24 15:24 ` David Ahern
2012-03-05 15:47 ` Ingo Molnar
0 siblings, 2 replies; 37+ messages in thread
From: Stephane Eranian @ 2012-02-24 9:40 UTC (permalink / raw)
To: linux-kernel
Cc: acme, peterz, mingo, dsahern, ravitillo, khandual, asharma,
robert.richter, ming.m.lin, vweaver1, andi
This patch adds auto-detection of samples with taken branch stacks.
The auto-detection avoids having to specify the -b or --branch-stack
option on the cmdline.
The patch adds a new feature bit HEADER_BRANCH_STACK to mark the
presence of branch stacks in samples.
You can now do:
$ perf record -b any noploop 2
$ perf report
# Events: 8K cycles
#
# Overhead Command Source Shared Object Source Symbol Target Shared Object Target Symbol
# ........ ....... .................... ................... .................... ..................
#
91.56% noploop noploop [.] noploop noploop [.] noploop
0.42% noploop [kernel.kallsyms] [k] __lock_acquire [kernel.kallsyms] [k] __lock_acquire
To force regular reporting based on the instruction address:
$ perf report --no-branch-stack
#
# Events: 2K cycles
#
# Overhead Command Shared Object Symbol
# ........ ....... ................. ...............................
#
92.03% noploop noploop [.] noploop
1.00% noploop [kernel.kallsyms] [k] lock_acquire
Signed-off-by: Stephane Eranian <eranian@google.com>
---
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 1c49d4e..5e833a2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
if (!have_tracepoints(&evsel_list->entries))
perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
+ if (!rec->opts.branch_stack)
+ perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+
if (!rec->file_new) {
err = perf_session__read_header(session, output);
if (err < 0)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 528789f..edd4289 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -306,21 +306,14 @@ static int __cmd_report(struct perf_report *rep)
{
int ret = -EINVAL;
u64 nr_samples;
- struct perf_session *session;
struct perf_evsel *pos;
+ struct perf_session *session = rep->session;
struct map *kernel_map;
struct kmap *kernel_kmap;
const char *help = "For a higher level overview, try: perf report --sort comm,dso";
signal(SIGINT, sig_handler);
- session = perf_session__new(rep->input_name, O_RDONLY,
- rep->force, false, &rep->tool);
- if (session == NULL)
- return -ENOMEM;
-
- rep->session = session;
-
if (rep->cpu_list) {
ret = perf_session__cpu_bitmap(session, rep->cpu_list,
rep->cpu_bitmap);
@@ -489,7 +482,10 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
int cmd_report(int argc, const char **argv, const char *prefix __used)
{
+ struct perf_session *session;
struct stat st;
+ bool has_br_stack;
+ int ret = -1;
char callchain_default_opt[] = "fractal,0.5,callee";
const char * const report_usage[] = {
"perf report [<options>]",
@@ -600,7 +596,23 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
report.input_name = "perf.data";
}
- if (sort__branch_mode) {
+ session = perf_session__new(report.input_name, O_RDONLY,
+ report.force, false, &report.tool);
+ if (session == NULL)
+ return -ENOMEM;
+
+ report.session = session;
+
+ has_br_stack = perf_header__has_feat(&session->header,
+ HEADER_BRANCH_STACK);
+
+ /*
+ * if branch mode set by user via -b or --branch-stack
+ * or not forced off by user (-no-branch-stack) user and present
+ * in the file then we set branch mode
+ */
+ if (sort__branch_mode || (sort__branch_mode == -1 && has_br_stack)) {
+ sort__branch_mode = true;
if (use_browser)
fprintf(stderr, "Warning: TUI interface not supported"
" in branch mode\n");
@@ -657,13 +669,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
}
if (symbol__init() < 0)
- return -1;
+ goto error;
setup_sorting(report_usage, options);
if (parent_pattern != default_parent_pattern) {
if (sort_dimension__add("parent") < 0)
- return -1;
+ goto error;
/*
* Only show the parent fields if we explicitly
@@ -685,5 +697,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
- return __cmd_report(&report);
+ ret = __cmd_report(&report);
+error:
+ perf_session__delete(session);
+ return ret;
}
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index c851495..c22491e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1023,6 +1023,12 @@ static int write_cpuid(int fd, struct perf_header *h __used,
return do_write_string(fd, buffer);
}
+static int write_branch_stack(int fd __used, struct perf_header *h __used,
+ struct perf_evlist *evlist __used)
+{
+ return 0;
+}
+
static void print_hostname(struct perf_header *ph, int fd, FILE *fp)
{
char *str = do_read_string(fd, ph);
@@ -1315,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp)
free(str);
}
+static void print_branch_stack(struct perf_header *ph __used, int fd __used,
+ FILE *fp)
+{
+ fprintf(fp, "# contains samples with branch stacks\n");
+}
+
static int __event_process_build_id(struct build_id_event *bev,
char *filename,
struct perf_session *session)
@@ -1519,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPA(HEADER_CMDLINE, cmdline),
FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology),
FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology),
+ FEAT_OPA(HEADER_BRANCH_STACK, branch_stack),
};
struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index e68f617..21a6be0 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -27,7 +27,7 @@ enum {
HEADER_EVENT_DESC,
HEADER_CPU_TOPOLOGY,
HEADER_NUMA_TOPOLOGY,
-
+ HEADER_BRANCH_STACK,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 2739ed1..69d50c0 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -8,7 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol";
const char *sort_order = default_sort_order;
int sort__need_collapse = 0;
int sort__has_parent = 0;
-bool sort__branch_mode;
+bool sort__branch_mode = -1; /* -1 = means not set */
enum sort_type sort__first_dimension;
^ permalink raw reply related [flat|nested] 37+ messages in thread* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-02-24 9:40 [PATCH] perf report: auto-detect branch stack sampling mode Stephane Eranian @ 2012-02-24 15:24 ` David Ahern 2012-02-24 15:28 ` Stephane Eranian 2012-03-05 15:47 ` Ingo Molnar 1 sibling, 1 reply; 37+ messages in thread From: David Ahern @ 2012-02-24 15:24 UTC (permalink / raw) To: Stephane Eranian Cc: linux-kernel, acme, peterz, mingo, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi On 2/24/12 2:40 AM, Stephane Eranian wrote: > > This patch adds auto-detection of samples with taken branch stacks. > The auto-detection avoids having to specify the -b or --branch-stack > option on the cmdline. > > The patch adds a new feature bit HEADER_BRANCH_STACK to mark the > presence of branch stacks in samples. > > You can now do: > $ perf record -b any noploop 2 > $ perf report > # Events: 8K cycles > # > # Overhead Command Source Shared Object Source Symbol Target Shared Object Target Symbol > # ........ ....... .................... ................... .................... .................. > # > 91.56% noploop noploop [.] noploop noploop [.] noploop > 0.42% noploop [kernel.kallsyms] [k] __lock_acquire [kernel.kallsyms] [k] __lock_acquire > > > To force regular reporting based on the instruction address: > $ perf report --no-branch-stack > # > # Events: 2K cycles > # > # Overhead Command Shared Object Symbol > # ........ ....... ................. ............................... > # > 92.03% noploop noploop [.] noploop > 1.00% noploop [kernel.kallsyms] [k] lock_acquire > > > Signed-off-by: Stephane Eranian<eranian@google.com> > --- > > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c > index 1c49d4e..5e833a2 100644 > --- a/tools/perf/builtin-record.c > +++ b/tools/perf/builtin-record.c > @@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) > if (!have_tracepoints(&evsel_list->entries)) > perf_header__clear_feat(&session->header, HEADER_TRACE_INFO); > > + if (!rec->opts.branch_stack) > + perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); branch tracing is user requested on, so shouldn't feature default off and only be enabled when requested? David > + > if (!rec->file_new) { > err = perf_session__read_header(session, output); > if (err< 0) > diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c > index 528789f..edd4289 100644 > --- a/tools/perf/builtin-report.c > +++ b/tools/perf/builtin-report.c > @@ -306,21 +306,14 @@ static int __cmd_report(struct perf_report *rep) > { > int ret = -EINVAL; > u64 nr_samples; > - struct perf_session *session; > struct perf_evsel *pos; > + struct perf_session *session = rep->session; > struct map *kernel_map; > struct kmap *kernel_kmap; > const char *help = "For a higher level overview, try: perf report --sort comm,dso"; > > signal(SIGINT, sig_handler); > > - session = perf_session__new(rep->input_name, O_RDONLY, > - rep->force, false,&rep->tool); > - if (session == NULL) > - return -ENOMEM; > - > - rep->session = session; > - > if (rep->cpu_list) { > ret = perf_session__cpu_bitmap(session, rep->cpu_list, > rep->cpu_bitmap); > @@ -489,7 +482,10 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset) > > int cmd_report(int argc, const char **argv, const char *prefix __used) > { > + struct perf_session *session; > struct stat st; > + bool has_br_stack; > + int ret = -1; > char callchain_default_opt[] = "fractal,0.5,callee"; > const char * const report_usage[] = { > "perf report [<options>]", > @@ -600,7 +596,23 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) > report.input_name = "perf.data"; > } > > - if (sort__branch_mode) { > + session = perf_session__new(report.input_name, O_RDONLY, > + report.force, false,&report.tool); > + if (session == NULL) > + return -ENOMEM; > + > + report.session = session; > + > + has_br_stack = perf_header__has_feat(&session->header, > + HEADER_BRANCH_STACK); > + > + /* > + * if branch mode set by user via -b or --branch-stack > + * or not forced off by user (-no-branch-stack) user and present > + * in the file then we set branch mode > + */ > + if (sort__branch_mode || (sort__branch_mode == -1&& has_br_stack)) { > + sort__branch_mode = true; > if (use_browser) > fprintf(stderr, "Warning: TUI interface not supported" > " in branch mode\n"); > @@ -657,13 +669,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) > } > > if (symbol__init()< 0) > - return -1; > + goto error; > > setup_sorting(report_usage, options); > > if (parent_pattern != default_parent_pattern) { > if (sort_dimension__add("parent")< 0) > - return -1; > + goto error; > > /* > * Only show the parent fields if we explicitly > @@ -685,5 +697,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) > sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); > sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); > > - return __cmd_report(&report); > + ret = __cmd_report(&report); > +error: > + perf_session__delete(session); > + return ret; > } > diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c > index c851495..c22491e 100644 > --- a/tools/perf/util/header.c > +++ b/tools/perf/util/header.c > @@ -1023,6 +1023,12 @@ static int write_cpuid(int fd, struct perf_header *h __used, > return do_write_string(fd, buffer); > } > > +static int write_branch_stack(int fd __used, struct perf_header *h __used, > + struct perf_evlist *evlist __used) > +{ > + return 0; > +} > + > static void print_hostname(struct perf_header *ph, int fd, FILE *fp) > { > char *str = do_read_string(fd, ph); > @@ -1315,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp) > free(str); > } > > +static void print_branch_stack(struct perf_header *ph __used, int fd __used, > + FILE *fp) > +{ > + fprintf(fp, "# contains samples with branch stacks\n"); > +} > + > static int __event_process_build_id(struct build_id_event *bev, > char *filename, > struct perf_session *session) > @@ -1519,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { > FEAT_OPA(HEADER_CMDLINE, cmdline), > FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), > FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), > + FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), > }; > > struct header_print_data { > diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h > index e68f617..21a6be0 100644 > --- a/tools/perf/util/header.h > +++ b/tools/perf/util/header.h > @@ -27,7 +27,7 @@ enum { > HEADER_EVENT_DESC, > HEADER_CPU_TOPOLOGY, > HEADER_NUMA_TOPOLOGY, > - > + HEADER_BRANCH_STACK, > HEADER_LAST_FEATURE, > HEADER_FEAT_BITS = 256, > }; > diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c > index 2739ed1..69d50c0 100644 > --- a/tools/perf/util/sort.c > +++ b/tools/perf/util/sort.c > @@ -8,7 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol"; > const char *sort_order = default_sort_order; > int sort__need_collapse = 0; > int sort__has_parent = 0; > -bool sort__branch_mode; > +bool sort__branch_mode = -1; /* -1 = means not set */ > > enum sort_type sort__first_dimension; > ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-02-24 15:24 ` David Ahern @ 2012-02-24 15:28 ` Stephane Eranian 2012-02-24 15:31 ` David Ahern 0 siblings, 1 reply; 37+ messages in thread From: Stephane Eranian @ 2012-02-24 15:28 UTC (permalink / raw) To: David Ahern Cc: linux-kernel, acme, peterz, mingo, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi On Fri, Feb 24, 2012 at 4:24 PM, David Ahern <dsahern@gmail.com> wrote: > On 2/24/12 2:40 AM, Stephane Eranian wrote: >> >> >> This patch adds auto-detection of samples with taken branch stacks. >> The auto-detection avoids having to specify the -b or --branch-stack >> option on the cmdline. >> >> The patch adds a new feature bit HEADER_BRANCH_STACK to mark the >> presence of branch stacks in samples. >> >> You can now do: >> $ perf record -b any noploop 2 >> $ perf report >> # Events: 8K cycles >> # >> # Overhead Command Source Shared Object Source Symbol Target >> Shared Object Target Symbol >> # ........ ....... .................... ................... >> .................... .................. >> # >> 91.56% noploop noploop [.] noploop >> noploop [.] noploop >> 0.42% noploop [kernel.kallsyms] [k] __lock_acquire >> [kernel.kallsyms] [k] __lock_acquire >> >> >> To force regular reporting based on the instruction address: >> $ perf report --no-branch-stack >> # >> # Events: 2K cycles >> # >> # Overhead Command Shared Object Symbol >> # ........ ....... ................. ............................... >> # >> 92.03% noploop noploop [.] noploop >> 1.00% noploop [kernel.kallsyms] [k] lock_acquire >> >> >> Signed-off-by: Stephane Eranian<eranian@google.com> >> --- >> >> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c >> index 1c49d4e..5e833a2 100644 >> --- a/tools/perf/builtin-record.c >> +++ b/tools/perf/builtin-record.c >> @@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int >> argc, const char **argv) >> if (!have_tracepoints(&evsel_list->entries)) >> perf_header__clear_feat(&session->header, >> HEADER_TRACE_INFO); >> >> + if (!rec->opts.branch_stack) >> + perf_header__clear_feat(&session->header, >> HEADER_BRANCH_STACK); > > > branch tracing is user requested on, so shouldn't feature default off and > only be enabled when requested? > Well, what Ingo was suggesting is that perf report auto-detects whether or not branch mode is necessary by looking at the perf.data file. Most likely if you've recorded with -b, you are interested in a branch mode view rather that the instruction view (default). So all this does is elimintate the need to pass -b to perf report to enable branch mode. > David > > >> + >> if (!rec->file_new) { >> err = perf_session__read_header(session, output); >> if (err< 0) >> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c >> index 528789f..edd4289 100644 >> --- a/tools/perf/builtin-report.c >> +++ b/tools/perf/builtin-report.c >> @@ -306,21 +306,14 @@ static int __cmd_report(struct perf_report *rep) >> { >> int ret = -EINVAL; >> u64 nr_samples; >> - struct perf_session *session; >> struct perf_evsel *pos; >> + struct perf_session *session = rep->session; >> struct map *kernel_map; >> struct kmap *kernel_kmap; >> const char *help = "For a higher level overview, try: perf report >> --sort comm,dso"; >> >> signal(SIGINT, sig_handler); >> >> - session = perf_session__new(rep->input_name, O_RDONLY, >> - rep->force, false,&rep->tool); >> >> - if (session == NULL) >> - return -ENOMEM; >> - >> - rep->session = session; >> - >> if (rep->cpu_list) { >> ret = perf_session__cpu_bitmap(session, rep->cpu_list, >> rep->cpu_bitmap); >> @@ -489,7 +482,10 @@ parse_callchain_opt(const struct option *opt, const >> char *arg, int unset) >> >> int cmd_report(int argc, const char **argv, const char *prefix __used) >> { >> + struct perf_session *session; >> struct stat st; >> + bool has_br_stack; >> + int ret = -1; >> char callchain_default_opt[] = "fractal,0.5,callee"; >> const char * const report_usage[] = { >> "perf report [<options>]", >> @@ -600,7 +596,23 @@ int cmd_report(int argc, const char **argv, const >> char *prefix __used) >> report.input_name = "perf.data"; >> } >> >> - if (sort__branch_mode) { >> + session = perf_session__new(report.input_name, O_RDONLY, >> + report.force, false,&report.tool); >> + if (session == NULL) >> + return -ENOMEM; >> + >> + report.session = session; >> + >> + has_br_stack = perf_header__has_feat(&session->header, >> + HEADER_BRANCH_STACK); >> + >> + /* >> + * if branch mode set by user via -b or --branch-stack >> + * or not forced off by user (-no-branch-stack) user and present >> + * in the file then we set branch mode >> + */ >> + if (sort__branch_mode || (sort__branch_mode == -1&& >> has_br_stack)) { >> >> + sort__branch_mode = true; >> if (use_browser) >> fprintf(stderr, "Warning: TUI interface not >> supported" >> " in branch mode\n"); >> @@ -657,13 +669,13 @@ int cmd_report(int argc, const char **argv, const >> char *prefix __used) >> } >> >> if (symbol__init()< 0) >> - return -1; >> + goto error; >> >> setup_sorting(report_usage, options); >> >> if (parent_pattern != default_parent_pattern) { >> if (sort_dimension__add("parent")< 0) >> - return -1; >> + goto error; >> >> /* >> * Only show the parent fields if we explicitly >> @@ -685,5 +697,8 @@ int cmd_report(int argc, const char **argv, const char >> *prefix __used) >> sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", >> stdout); >> sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", >> stdout); >> >> - return __cmd_report(&report); >> + ret = __cmd_report(&report); >> +error: >> + perf_session__delete(session); >> + return ret; >> } >> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c >> index c851495..c22491e 100644 >> --- a/tools/perf/util/header.c >> +++ b/tools/perf/util/header.c >> @@ -1023,6 +1023,12 @@ static int write_cpuid(int fd, struct perf_header >> *h __used, >> return do_write_string(fd, buffer); >> } >> >> +static int write_branch_stack(int fd __used, struct perf_header *h >> __used, >> + struct perf_evlist *evlist __used) >> +{ >> + return 0; >> +} >> + >> static void print_hostname(struct perf_header *ph, int fd, FILE *fp) >> { >> char *str = do_read_string(fd, ph); >> @@ -1315,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int >> fd, FILE *fp) >> free(str); >> } >> >> +static void print_branch_stack(struct perf_header *ph __used, int fd >> __used, >> + FILE *fp) >> +{ >> + fprintf(fp, "# contains samples with branch stacks\n"); >> +} >> + >> static int __event_process_build_id(struct build_id_event *bev, >> char *filename, >> struct perf_session *session) >> @@ -1519,6 +1531,7 @@ static const struct feature_ops >> feat_ops[HEADER_LAST_FEATURE] = { >> FEAT_OPA(HEADER_CMDLINE, cmdline), >> FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), >> FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), >> + FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), >> }; >> >> struct header_print_data { >> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h >> index e68f617..21a6be0 100644 >> --- a/tools/perf/util/header.h >> +++ b/tools/perf/util/header.h >> @@ -27,7 +27,7 @@ enum { >> HEADER_EVENT_DESC, >> HEADER_CPU_TOPOLOGY, >> HEADER_NUMA_TOPOLOGY, >> - >> + HEADER_BRANCH_STACK, >> HEADER_LAST_FEATURE, >> HEADER_FEAT_BITS = 256, >> }; >> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c >> index 2739ed1..69d50c0 100644 >> --- a/tools/perf/util/sort.c >> +++ b/tools/perf/util/sort.c >> @@ -8,7 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol"; >> const char *sort_order = default_sort_order; >> int sort__need_collapse = 0; >> int sort__has_parent = 0; >> -bool sort__branch_mode; >> +bool sort__branch_mode = -1; /* -1 = means not set */ >> >> enum sort_type sort__first_dimension; >> > ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-02-24 15:28 ` Stephane Eranian @ 2012-02-24 15:31 ` David Ahern 2012-02-24 15:40 ` Stephane Eranian 0 siblings, 1 reply; 37+ messages in thread From: David Ahern @ 2012-02-24 15:31 UTC (permalink / raw) To: Stephane Eranian Cc: linux-kernel, acme, peterz, mingo, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi On 2/24/12 8:28 AM, Stephane Eranian wrote: > On Fri, Feb 24, 2012 at 4:24 PM, David Ahern<dsahern@gmail.com> wrote: >> On 2/24/12 2:40 AM, Stephane Eranian wrote: >>> >>> >>> This patch adds auto-detection of samples with taken branch stacks. >>> The auto-detection avoids having to specify the -b or --branch-stack >>> option on the cmdline. >>> >>> The patch adds a new feature bit HEADER_BRANCH_STACK to mark the >>> presence of branch stacks in samples. >>> >>> You can now do: >>> $ perf record -b any noploop 2 >>> $ perf report >>> # Events: 8K cycles >>> # >>> # Overhead Command Source Shared Object Source Symbol Target >>> Shared Object Target Symbol >>> # ........ ....... .................... ................... >>> .................... .................. >>> # >>> 91.56% noploop noploop [.] noploop >>> noploop [.] noploop >>> 0.42% noploop [kernel.kallsyms] [k] __lock_acquire >>> [kernel.kallsyms] [k] __lock_acquire >>> >>> >>> To force regular reporting based on the instruction address: >>> $ perf report --no-branch-stack >>> # >>> # Events: 2K cycles >>> # >>> # Overhead Command Shared Object Symbol >>> # ........ ....... ................. ............................... >>> # >>> 92.03% noploop noploop [.] noploop >>> 1.00% noploop [kernel.kallsyms] [k] lock_acquire >>> >>> >>> Signed-off-by: Stephane Eranian<eranian@google.com> >>> --- >>> >>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c >>> index 1c49d4e..5e833a2 100644 >>> --- a/tools/perf/builtin-record.c >>> +++ b/tools/perf/builtin-record.c >>> @@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int >>> argc, const char **argv) >>> if (!have_tracepoints(&evsel_list->entries)) >>> perf_header__clear_feat(&session->header, >>> HEADER_TRACE_INFO); >>> >>> + if (!rec->opts.branch_stack) >>> + perf_header__clear_feat(&session->header, >>> HEADER_BRANCH_STACK); >> >> >> branch tracing is user requested on, so shouldn't feature default off and >> only be enabled when requested? >> > Well, what Ingo was suggesting is that perf report auto-detects whether or > not branch mode is necessary by looking at the perf.data file. Most likely > if you've recorded with -b, you are interested in a branch mode view rather > that the instruction view (default). So all this does is elimintate the need > to pass -b to perf report to enable branch mode. Right. My point is that HEADER_BRANCH_STACK feature should be off by default and only enabled by builtin-record when branch stack is requested. You have the reverse -- enabled by default and off in record if not requested. David ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-02-24 15:31 ` David Ahern @ 2012-02-24 15:40 ` Stephane Eranian 2012-02-24 15:49 ` David Ahern 0 siblings, 1 reply; 37+ messages in thread From: Stephane Eranian @ 2012-02-24 15:40 UTC (permalink / raw) To: David Ahern Cc: linux-kernel, acme, peterz, mingo, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi On Fri, Feb 24, 2012 at 4:31 PM, David Ahern <dsahern@gmail.com> wrote: > On 2/24/12 8:28 AM, Stephane Eranian wrote: >> >> On Fri, Feb 24, 2012 at 4:24 PM, David Ahern<dsahern@gmail.com> wrote: >>> >>> On 2/24/12 2:40 AM, Stephane Eranian wrote: >>>> >>>> >>>> >>>> This patch adds auto-detection of samples with taken branch stacks. >>>> The auto-detection avoids having to specify the -b or --branch-stack >>>> option on the cmdline. >>>> >>>> The patch adds a new feature bit HEADER_BRANCH_STACK to mark the >>>> presence of branch stacks in samples. >>>> >>>> You can now do: >>>> $ perf record -b any noploop 2 >>>> $ perf report >>>> # Events: 8K cycles >>>> # >>>> # Overhead Command Source Shared Object Source Symbol Target >>>> Shared Object Target Symbol >>>> # ........ ....... .................... ................... >>>> .................... .................. >>>> # >>>> 91.56% noploop noploop [.] noploop >>>> noploop [.] noploop >>>> 0.42% noploop [kernel.kallsyms] [k] __lock_acquire >>>> [kernel.kallsyms] [k] __lock_acquire >>>> >>>> >>>> To force regular reporting based on the instruction address: >>>> $ perf report --no-branch-stack >>>> # >>>> # Events: 2K cycles >>>> # >>>> # Overhead Command Shared Object Symbol >>>> # ........ ....... ................. ............................... >>>> # >>>> 92.03% noploop noploop [.] noploop >>>> 1.00% noploop [kernel.kallsyms] [k] lock_acquire >>>> >>>> >>>> Signed-off-by: Stephane Eranian<eranian@google.com> >>>> --- >>>> >>>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c >>>> index 1c49d4e..5e833a2 100644 >>>> --- a/tools/perf/builtin-record.c >>>> +++ b/tools/perf/builtin-record.c >>>> @@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int >>>> argc, const char **argv) >>>> if (!have_tracepoints(&evsel_list->entries)) >>>> perf_header__clear_feat(&session->header, >>>> HEADER_TRACE_INFO); >>>> >>>> + if (!rec->opts.branch_stack) >>>> + perf_header__clear_feat(&session->header, >>>> HEADER_BRANCH_STACK); >>> >>> >>> >>> branch tracing is user requested on, so shouldn't feature default off and >>> only be enabled when requested? >>> >> Well, what Ingo was suggesting is that perf report auto-detects whether or >> not branch mode is necessary by looking at the perf.data file. Most likely >> if you've recorded with -b, you are interested in a branch mode view >> rather >> that the instruction view (default). So all this does is elimintate the >> need >> to pass -b to perf report to enable branch mode. > > > Right. My point is that HEADER_BRANCH_STACK feature should be off by default > and only enabled by builtin-record when branch stack is requested. You have > the reverse -- enabled by default and off in record if not requested. > No, I don't. Read the code carefully. The for loop sets all known feature bits. Then, the ones not necessary or unused are turned off individually. > David > ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-02-24 15:40 ` Stephane Eranian @ 2012-02-24 15:49 ` David Ahern 2012-02-24 15:51 ` Stephane Eranian 0 siblings, 1 reply; 37+ messages in thread From: David Ahern @ 2012-02-24 15:49 UTC (permalink / raw) To: Stephane Eranian Cc: linux-kernel, acme, peterz, mingo, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi On 2/24/12 8:40 AM, Stephane Eranian wrote: > No, I don't. Read the code carefully. The for loop sets all known feature bits. > Then, the ones not necessary or unused are turned off individually. Ok, I see now. The __set_feat loop is earlier in builtin-record.c, not the util code. David ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-02-24 15:49 ` David Ahern @ 2012-02-24 15:51 ` Stephane Eranian 2012-03-02 17:47 ` Stephane Eranian 0 siblings, 1 reply; 37+ messages in thread From: Stephane Eranian @ 2012-02-24 15:51 UTC (permalink / raw) To: David Ahern Cc: linux-kernel, acme, peterz, mingo, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi On Fri, Feb 24, 2012 at 4:49 PM, David Ahern <dsahern@gmail.com> wrote: > On 2/24/12 8:40 AM, Stephane Eranian wrote: >> >> No, I don't. Read the code carefully. The for loop sets all known feature >> bits. >> Then, the ones not necessary or unused are turned off individually. > > > Ok, I see now. The __set_feat loop is earlier in builtin-record.c, not the > util code. > Yeah. Arnaldo simplified the original code in builtin-record.c to have the loop instead of individual set_feat(). > David ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-02-24 15:51 ` Stephane Eranian @ 2012-03-02 17:47 ` Stephane Eranian 2012-03-02 19:08 ` Arnaldo Carvalho de Melo 0 siblings, 1 reply; 37+ messages in thread From: Stephane Eranian @ 2012-03-02 17:47 UTC (permalink / raw) To: David Ahern Cc: linux-kernel, acme, peterz, mingo, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi Arnaldo, What do you think about this patch? On Fri, Feb 24, 2012 at 4:51 PM, Stephane Eranian <eranian@google.com> wrote: > On Fri, Feb 24, 2012 at 4:49 PM, David Ahern <dsahern@gmail.com> wrote: >> On 2/24/12 8:40 AM, Stephane Eranian wrote: >>> >>> No, I don't. Read the code carefully. The for loop sets all known feature >>> bits. >>> Then, the ones not necessary or unused are turned off individually. >> >> >> Ok, I see now. The __set_feat loop is earlier in builtin-record.c, not the >> util code. >> > Yeah. Arnaldo simplified the original code in builtin-record.c to have the loop > instead of individual set_feat(). > >> David ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-02 17:47 ` Stephane Eranian @ 2012-03-02 19:08 ` Arnaldo Carvalho de Melo 2012-03-03 19:43 ` Arnaldo Carvalho de Melo 0 siblings, 1 reply; 37+ messages in thread From: Arnaldo Carvalho de Melo @ 2012-03-02 19:08 UTC (permalink / raw) To: Stephane Eranian Cc: David Ahern, linux-kernel, peterz, mingo, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi Em Fri, Mar 02, 2012 at 06:47:38PM +0100, Stephane Eranian escreveu: > Arnaldo, > > What do you think about this patch? I'm processing perf/core patches today, will get to this one and merge or let you know objections, - Arnaldo > > On Fri, Feb 24, 2012 at 4:51 PM, Stephane Eranian <eranian@google.com> wrote: > > On Fri, Feb 24, 2012 at 4:49 PM, David Ahern <dsahern@gmail.com> wrote: > >> On 2/24/12 8:40 AM, Stephane Eranian wrote: > >>> > >>> No, I don't. Read the code carefully. The for loop sets all known feature > >>> bits. > >>> Then, the ones not necessary or unused are turned off individually. > >> > >> > >> Ok, I see now. The __set_feat loop is earlier in builtin-record.c, not the > >> util code. > >> > > Yeah. Arnaldo simplified the original code in builtin-record.c to have the loop > > instead of individual set_feat(). > > > >> David ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-02 19:08 ` Arnaldo Carvalho de Melo @ 2012-03-03 19:43 ` Arnaldo Carvalho de Melo 2012-03-05 10:49 ` Ingo Molnar 0 siblings, 1 reply; 37+ messages in thread From: Arnaldo Carvalho de Melo @ 2012-03-03 19:43 UTC (permalink / raw) To: Ingo Molnar, Stephane Eranian Cc: David Ahern, linux-kernel, peterz, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi Em Fri, Mar 02, 2012 at 04:08:22PM -0300, Arnaldo Carvalho de Melo escreveu: > Em Fri, Mar 02, 2012 at 06:47:38PM +0100, Stephane Eranian escreveu: > > Arnaldo, > > > > What do you think about this patch? > > I'm processing perf/core patches today, will get to this one and merge > or let you know objections, Ingo, What is the status of merging http://git.kernel.org/?p=linux/kernel/git/peterz/queue.git;a=summary ? After that I can try merging Stephane's autho-detect patch, - Arnaldo > > On Fri, Feb 24, 2012 at 4:51 PM, Stephane Eranian <eranian@google.com> wrote: > > > On Fri, Feb 24, 2012 at 4:49 PM, David Ahern <dsahern@gmail.com> wrote: > > >> On 2/24/12 8:40 AM, Stephane Eranian wrote: > > >>> > > >>> No, I don't. Read the code carefully. The for loop sets all known feature > > >>> bits. > > >>> Then, the ones not necessary or unused are turned off individually. > > >> > > >> > > >> Ok, I see now. The __set_feat loop is earlier in builtin-record.c, not the > > >> util code. > > >> > > > Yeah. Arnaldo simplified the original code in builtin-record.c to have the loop > > > instead of individual set_feat(). > > > > > >> David ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-03 19:43 ` Arnaldo Carvalho de Melo @ 2012-03-05 10:49 ` Ingo Molnar 2012-03-05 11:11 ` Peter Zijlstra 0 siblings, 1 reply; 37+ messages in thread From: Ingo Molnar @ 2012-03-05 10:49 UTC (permalink / raw) To: Arnaldo Carvalho de Melo Cc: Stephane Eranian, David Ahern, linux-kernel, peterz, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi * Arnaldo Carvalho de Melo <acme@redhat.com> wrote: > Em Fri, Mar 02, 2012 at 04:08:22PM -0300, Arnaldo Carvalho de Melo escreveu: > > Em Fri, Mar 02, 2012 at 06:47:38PM +0100, Stephane Eranian escreveu: > > > Arnaldo, > > > > > > What do you think about this patch? > > > > I'm processing perf/core patches today, will get to this one and merge > > or let you know objections, > > Ingo, > > What is the status of merging > http://git.kernel.org/?p=linux/kernel/git/peterz/queue.git;a=summary ? > > After that I can try merging Stephane's autho-detect patch, I guess I'll get this from Peter soonish? I think it would be nice to have the tooling patches in the same queue as well, so that I can try it all out as one package before pushing it. Thanks, Ingo ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-05 10:49 ` Ingo Molnar @ 2012-03-05 11:11 ` Peter Zijlstra 0 siblings, 0 replies; 37+ messages in thread From: Peter Zijlstra @ 2012-03-05 11:11 UTC (permalink / raw) To: Ingo Molnar Cc: Arnaldo Carvalho de Melo, Stephane Eranian, David Ahern, linux-kernel, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi On Mon, 2012-03-05 at 11:49 +0100, Ingo Molnar wrote: > * Arnaldo Carvalho de Melo <acme@redhat.com> wrote: > > > Em Fri, Mar 02, 2012 at 04:08:22PM -0300, Arnaldo Carvalho de Melo escreveu: > > > Em Fri, Mar 02, 2012 at 06:47:38PM +0100, Stephane Eranian escreveu: > > > > Arnaldo, > > > > > > > > What do you think about this patch? > > > > > > I'm processing perf/core patches today, will get to this one and merge > > > or let you know objections, > > > > Ingo, > > > > What is the status of merging > > http://git.kernel.org/?p=linux/kernel/git/peterz/queue.git;a=summary ? > > > > After that I can try merging Stephane's autho-detect patch, > > I guess I'll get this from Peter soonish? > > I think it would be nice to have the tooling patches in the same > queue as well, so that I can try it all out as one package > before pushing it. Hmm,. I somehow thought you already had it since you complained about that record vs report thing. Lemme resurrect that series then.. :/ ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-02-24 9:40 [PATCH] perf report: auto-detect branch stack sampling mode Stephane Eranian 2012-02-24 15:24 ` David Ahern @ 2012-03-05 15:47 ` Ingo Molnar 2012-03-05 15:50 ` Ingo Molnar ` (2 more replies) 1 sibling, 3 replies; 37+ messages in thread From: Ingo Molnar @ 2012-03-05 15:47 UTC (permalink / raw) To: Stephane Eranian Cc: linux-kernel, acme, peterz, dsahern, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi, Peter Zijlstra Another usability bug of the new branch sampling feature is that typing the obvious: $ perf record -b ./myprog does not do the obvious thing and default to 'any' but throws a *very* unhelpful generic perf error: usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] ... Really, we should do better than this. Only people who *want* to specify finer LBR filters should be forced to specify them. Thanks, Ingo ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-05 15:47 ` Ingo Molnar @ 2012-03-05 15:50 ` Ingo Molnar 2012-03-05 15:56 ` Ingo Molnar 2012-03-05 15:52 ` [PATCH] perf report: auto-detect branch stack sampling mode Stephane Eranian 2012-03-07 12:49 ` Stephane Eranian 2 siblings, 1 reply; 37+ messages in thread From: Ingo Molnar @ 2012-03-05 15:50 UTC (permalink / raw) To: Stephane Eranian Cc: linux-kernel, acme, peterz, dsahern, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi, Peter Zijlstra * Ingo Molnar <mingo@elte.hu> wrote: > Really, we should do better than this. Only people who *want* > to specify finer LBR filters should be forced to specify them. Another detail is that perf record outputs this message: [ perf record: Captured and wrote 1.699 MB perf.data (~74220 samples) ] The '~74220 samples' should probably say '~74220 branch samples'. (And yes, I realize that it's not just branch samples but that's not a big issue.) Thanks, Ingo ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-05 15:50 ` Ingo Molnar @ 2012-03-05 15:56 ` Ingo Molnar 2012-03-05 16:30 ` Peter Zijlstra 0 siblings, 1 reply; 37+ messages in thread From: Ingo Molnar @ 2012-03-05 15:56 UTC (permalink / raw) To: Stephane Eranian Cc: linux-kernel, acme, peterz, dsahern, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi, Peter Zijlstra A third usability bug is that for some reason a branch sampling 'perf report' defaults to the stdio output on my testsystem - with --no-branch-stack it gives the regular tui. Even specifying --tui explicitly does not restore the TUI. Is there any fundamental reason why the branch stack does not support the TUI? A fourth bug is that regular profiling appears to be busted: aldebaran:> perf record -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.026 MB perf.data (~1120 samples) ] aldebaran:~> perf report Warning: TUI interface not supported in branch mode selected -b but no branch data. Did you call perf record without -b? Guys, why are you sending me these patches? The tooling side is utterly untested and very raw yet. Thanks, Ingo ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-05 15:56 ` Ingo Molnar @ 2012-03-05 16:30 ` Peter Zijlstra 2012-03-05 16:32 ` Stephane Eranian 2012-03-05 20:35 ` Arnaldo Carvalho de Melo 0 siblings, 2 replies; 37+ messages in thread From: Peter Zijlstra @ 2012-03-05 16:30 UTC (permalink / raw) To: Ingo Molnar Cc: Stephane Eranian, linux-kernel, acme, dsahern, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi On Mon, 2012-03-05 at 16:56 +0100, Ingo Molnar wrote: > A third usability bug is that for some reason a branch sampling > 'perf report' defaults to the stdio output on my testsystem - > with --no-branch-stack it gives the regular tui. > > Ah, I would not have noticed that since I still explicitly build my perf without TUI support. That stuff mostly just gets in the way. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-05 16:30 ` Peter Zijlstra @ 2012-03-05 16:32 ` Stephane Eranian 2012-03-05 17:20 ` Ingo Molnar 2012-03-05 20:35 ` Arnaldo Carvalho de Melo 1 sibling, 1 reply; 37+ messages in thread From: Stephane Eranian @ 2012-03-05 16:32 UTC (permalink / raw) To: Peter Zijlstra Cc: Ingo Molnar, linux-kernel, acme, dsahern, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi On Mon, Mar 5, 2012 at 5:30 PM, Peter Zijlstra <peterz@infradead.org> wrote: > On Mon, 2012-03-05 at 16:56 +0100, Ingo Molnar wrote: >> A third usability bug is that for some reason a branch sampling >> 'perf report' defaults to the stdio output on my testsystem - >> with --no-branch-stack it gives the regular tui. >> >> > Ah, I would not have noticed that since I still explicitly build my perf > without TUI support. That stuff mostly just gets in the way. I tend to agree with Peter here. There is no TUI support for branch as of now. I will fix the other two bugs you reported. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-05 16:32 ` Stephane Eranian @ 2012-03-05 17:20 ` Ingo Molnar 0 siblings, 0 replies; 37+ messages in thread From: Ingo Molnar @ 2012-03-05 17:20 UTC (permalink / raw) To: Stephane Eranian Cc: Peter Zijlstra, linux-kernel, acme, dsahern, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi * Stephane Eranian <eranian@google.com> wrote: > On Mon, Mar 5, 2012 at 5:30 PM, Peter Zijlstra <peterz@infradead.org> wrote: > > On Mon, 2012-03-05 at 16:56 +0100, Ingo Molnar wrote: > >> A third usability bug is that for some reason a branch sampling > >> 'perf report' defaults to the stdio output on my testsystem - > >> with --no-branch-stack it gives the regular tui. > >> > >> > > Ah, I would not have noticed that since I still explicitly > > build my perf without TUI support. That stuff mostly just > > gets in the way. > > I tend to agree with Peter here. I use the TUI most of the time (it allows me to navigate and look at annotated output easily - something the --stdio one cannot do), and I know at least two other kernel developers who prefer the TUI, so it's a YMMV thing. > There is no TUI support for branch as of now. Why? What would be needed to add proper TUI support? Thanks, Ingo ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-05 16:30 ` Peter Zijlstra 2012-03-05 16:32 ` Stephane Eranian @ 2012-03-05 20:35 ` Arnaldo Carvalho de Melo 2012-03-05 21:43 ` Arun Sharma 1 sibling, 1 reply; 37+ messages in thread From: Arnaldo Carvalho de Melo @ 2012-03-05 20:35 UTC (permalink / raw) To: Peter Zijlstra Cc: Ingo Molnar, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi Em Mon, Mar 05, 2012 at 05:30:33PM +0100, Peter Zijlstra escreveu: > On Mon, 2012-03-05 at 16:56 +0100, Ingo Molnar wrote: > > A third usability bug is that for some reason a branch sampling > > 'perf report' defaults to the stdio output on my testsystem - > > with --no-branch-stack it gives the regular tui. > Ah, I would not have noticed that since I still explicitly build my perf > without TUI support. That stuff mostly just gets in the way. Do you have any specific complaints about it? There were changes to make it work as the stdio one when navigation keys aren't used, i.e. color scheme, etc. Its not there to be in the way, but to be useful, so any specific complaints I would love to hear from you. - Arnaldo ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-05 20:35 ` Arnaldo Carvalho de Melo @ 2012-03-05 21:43 ` Arun Sharma 2012-03-05 22:26 ` Arnaldo Carvalho de Melo 0 siblings, 1 reply; 37+ messages in thread From: Arun Sharma @ 2012-03-05 21:43 UTC (permalink / raw) To: Arnaldo Carvalho de Melo Cc: Peter Zijlstra, Ingo Molnar, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi [-- Attachment #1: Type: text/plain, Size: 930 bytes --] On 3/5/12 12:35 PM, Arnaldo Carvalho de Melo wrote: > Em Mon, Mar 05, 2012 at 05:30:33PM +0100, Peter Zijlstra escreveu: >> On Mon, 2012-03-05 at 16:56 +0100, Ingo Molnar wrote: >>> A third usability bug is that for some reason a branch sampling >>> 'perf report' defaults to the stdio output on my testsystem - >>> with --no-branch-stack it gives the regular tui. > >> Ah, I would not have noticed that since I still explicitly build my perf >> without TUI support. That stuff mostly just gets in the way. > > Do you have any specific complaints about it? > I think --tui code paths have some bugs that cause it to SIGSEGV, while the --stdio paths don't. I think much of it has to do with how often that particular combination of command line options is used. Here's an example: # perf record -ag -- sleep 3 # perf report -G -s pid --tui # SIGSEGV # perf report -G -s pid --stdio # works fine Details attached. -Arun [-- Attachment #2: perf-debug.txt --] [-- Type: text/plain, Size: 4716 bytes --] Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 139975795275488 (LWP 13155)] symbol__inc_addr_samples (sym=0x91eb90, map=<value optimized out>, evidx=0, addr=120416) at util/annotate.c:73 73 util/annotate.c: No such file or directory. in util/annotate.c (gdb) bt #0 symbol__inc_addr_samples (sym=0x91eb90, map=<value optimized out>, evidx=0, addr=120416) at util/annotate.c:73 #1 0x0000000000410b5d in process_sample_event (tool=<value optimized out>, event=0x7f4ea28e26b0, sample=0x7fffcc1ca8f0, evsel=0x823ea0, machine=0x822290) at builtin-report.c:127 #2 0x00000000004433ca in flush_sample_queue (s=0x822230, tool=0x7fffcc1cc340) at util/session.c:528 #3 0x0000000000444d16 in __perf_session__process_events (session=0x822230, data_offset=<value optimized out>, data_size=<value optimized out>, file_size=<value optimized out>, tool=0x7fffcc1cc340) at util/session.c:1175 #4 0x0000000000445217 in perf_session__process_events (self=0x822230, tool=0x7fffcc1cc340) at util/session.c:1191 #5 0x000000000041015b in cmd_report (argc=0, argv=0x7fffcc1cc830, prefix=<value optimized out>) at builtin-report.c:311 #6 0x00000000004051b9 in handle_internal_command (argc=4, argv=0x7fffcc1cc830) at perf.c:273 #7 0x0000000000405623 in main (argc=4, argv=0x479218) at perf.c:388 (gdb) p /x sym->start $5 = 0xffffffff8100fb74 (gdb) p /x addr $6 = 0x1d660 (gdb) p offset $7 = 2130762476 54 int symbol__inc_addr_samples(struct symbol *sym, struct map *map, 55 int evidx, u64 addr) 56 { 57 unsigned offset; 58 struct annotation *notes; 59 struct sym_hist *h; 60 61 notes = symbol__annotation(sym); 62 if (notes->src == NULL) 63 return -ENOMEM; 64 65 pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr)); 66 67 if (addr >= sym->end) 68 return 0; 69 70 offset = addr - sym->start; 71 h = annotation__histogram(notes, evidx); 72 h->sum++; 73 h->addr[offset]++; <-- potential bad memory reference ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-05 21:43 ` Arun Sharma @ 2012-03-05 22:26 ` Arnaldo Carvalho de Melo 2012-03-05 23:35 ` Arun Sharma 0 siblings, 1 reply; 37+ messages in thread From: Arnaldo Carvalho de Melo @ 2012-03-05 22:26 UTC (permalink / raw) To: Arun Sharma Cc: Peter Zijlstra, Ingo Molnar, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi Em Mon, Mar 05, 2012 at 01:43:23PM -0800, Arun Sharma escreveu: > On 3/5/12 12:35 PM, Arnaldo Carvalho de Melo wrote: > >Em Mon, Mar 05, 2012 at 05:30:33PM +0100, Peter Zijlstra escreveu: > >>On Mon, 2012-03-05 at 16:56 +0100, Ingo Molnar wrote: > >Do you have any specific complaints about it? > > I think --tui code paths have some bugs that cause it to SIGSEGV, > while the --stdio paths don't. I think much of it has to do with how We'll have the same problem with --gtk, i.e. as much as we librarise the backend code, there will be bugs in the frontends, but then we'll fix as in this case. What I was more concerned was what usability problems Peter and Stephane had with the TUI, i.e. any missing feature they like on --stdio, or any annoyance wrt color schemes, key bindings, etc. I made an effort to try and have the look and feel of the TUI to resemble as much as possible the one in --stdio, while allowing usability improvements not possible in the --stdio case, like: . Folding/unfolding callchains . Zooming by DSO, etc . Going to annotate and back really quickly . Initial support for navigation in the annotate window (just callq for now, but jumps, etc are on the plans) . Toggling source code on/off on the annotation window > often that particular combination of command line options is used. > > Here's an example: > > # perf record -ag -- sleep 3 > # perf report -G -s pid --tui # SIGSEGV Ok, now this is a good report, I managed to reproduce and will work on a fix, thanks, - Arnaldo > # perf report -G -s pid --stdio # works fine > > Details attached. > > -Arun > > > > > Program received signal SIGSEGV, Segmentation fault. > [Switching to Thread 139975795275488 (LWP 13155)] > symbol__inc_addr_samples (sym=0x91eb90, map=<value optimized out>, evidx=0, addr=120416) at util/annotate.c:73 > 73 util/annotate.c: No such file or directory. > in util/annotate.c > (gdb) bt > #0 symbol__inc_addr_samples (sym=0x91eb90, map=<value optimized out>, evidx=0, addr=120416) at util/annotate.c:73 > #1 0x0000000000410b5d in process_sample_event (tool=<value optimized out>, event=0x7f4ea28e26b0, sample=0x7fffcc1ca8f0, evsel=0x823ea0, machine=0x822290) at builtin-report.c:127 > #2 0x00000000004433ca in flush_sample_queue (s=0x822230, tool=0x7fffcc1cc340) at util/session.c:528 > #3 0x0000000000444d16 in __perf_session__process_events (session=0x822230, data_offset=<value optimized out>, data_size=<value optimized out>, file_size=<value optimized out>, tool=0x7fffcc1cc340) > at util/session.c:1175 > #4 0x0000000000445217 in perf_session__process_events (self=0x822230, tool=0x7fffcc1cc340) at util/session.c:1191 > #5 0x000000000041015b in cmd_report (argc=0, argv=0x7fffcc1cc830, prefix=<value optimized out>) at builtin-report.c:311 > #6 0x00000000004051b9 in handle_internal_command (argc=4, argv=0x7fffcc1cc830) at perf.c:273 > #7 0x0000000000405623 in main (argc=4, argv=0x479218) at perf.c:388 > > (gdb) p /x sym->start > $5 = 0xffffffff8100fb74 > (gdb) p /x addr > $6 = 0x1d660 > (gdb) p offset > $7 = 2130762476 > > 54 int symbol__inc_addr_samples(struct symbol *sym, struct map *map, > 55 int evidx, u64 addr) > 56 { > 57 unsigned offset; > 58 struct annotation *notes; > 59 struct sym_hist *h; > 60 > 61 notes = symbol__annotation(sym); > 62 if (notes->src == NULL) > 63 return -ENOMEM; > 64 > 65 pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr)); > 66 > 67 if (addr >= sym->end) > 68 return 0; > 69 > 70 offset = addr - sym->start; > 71 h = annotation__histogram(notes, evidx); > 72 h->sum++; > 73 h->addr[offset]++; <-- potential bad memory reference ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-05 22:26 ` Arnaldo Carvalho de Melo @ 2012-03-05 23:35 ` Arun Sharma 2012-03-06 3:06 ` Arnaldo Carvalho de Melo 2012-03-06 6:25 ` Ingo Molnar 0 siblings, 2 replies; 37+ messages in thread From: Arun Sharma @ 2012-03-05 23:35 UTC (permalink / raw) To: Arnaldo Carvalho de Melo Cc: Peter Zijlstra, Ingo Molnar, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi Like you probably figured from my other mail, we deal with deeply nested callchains with unwieldy function names a lot -- thanks to C++ and template programming. --tui's collapsing/expanding functionality is quite useful to navigate that mess. I'm just taking this opportunity to get some attention focused on improving it :) On 3/5/12 2:26 PM, Arnaldo Carvalho de Melo wrote: >> Here's an example: >> >> # perf record -ag -- sleep 3 >> # perf report -G -s pid --tui # SIGSEGV > > Ok, now this is a good report, I managed to reproduce and will work on a > fix, thanks, Something like this seems to do it for me. offset = addr - sym->start; + len = sym->end - sym->start; + if (offset >= len) + return 0; + The other problem area seems to be callchains when using -p regexp -x options. I'll try to summarize problems there in a separate thread. -Arun ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-05 23:35 ` Arun Sharma @ 2012-03-06 3:06 ` Arnaldo Carvalho de Melo 2012-03-06 6:27 ` Ingo Molnar 2012-03-06 6:25 ` Ingo Molnar 1 sibling, 1 reply; 37+ messages in thread From: Arnaldo Carvalho de Melo @ 2012-03-06 3:06 UTC (permalink / raw) To: Arun Sharma Cc: Peter Zijlstra, Ingo Molnar, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi Em Mon, Mar 05, 2012 at 03:35:35PM -0800, Arun Sharma escreveu: > Like you probably figured from my other mail, we deal with deeply > nested callchains with unwieldy function names a lot -- thanks to C++ > and template programming. --tui's collapsing/expanding functionality > is quite useful to navigate that mess. I'm just taking this > opportunity to get some attention focused on improving it :) Excellent! I can think about other Zoom operations, like zooming into just the entries where some specific function in its callchains, say the one under the cursor, appears, etc. > On 3/5/12 2:26 PM, Arnaldo Carvalho de Melo wrote: > > >>Here's an example: > >> > >># perf record -ag -- sleep 3 > >># perf report -G -s pid --tui # SIGSEGV > > > >Ok, now this is a good report, I managed to reproduce and will work on a > >fix, thanks, > > Something like this seems to do it for me. > > offset = addr - sym->start; > + len = sym->end - sym->start; > + if (offset >= len) > + return 0; > + That is my fault, I should have added a BUG_ON() spitting out a callchain in this case, as that function shouldn't be called if the address is not within its range :-\ > The other problem area seems to be callchains when using -p regexp > -x options. I'll try to summarize problems there in a separate > thread. Please do, Thanks a lot! - Arnaldo ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-06 3:06 ` Arnaldo Carvalho de Melo @ 2012-03-06 6:27 ` Ingo Molnar 0 siblings, 0 replies; 37+ messages in thread From: Ingo Molnar @ 2012-03-06 6:27 UTC (permalink / raw) To: Arnaldo Carvalho de Melo Cc: Arun Sharma, Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi * Arnaldo Carvalho de Melo <acme@redhat.com> wrote: > > Something like this seems to do it for me. > > > > offset = addr - sym->start; > > + len = sym->end - sym->start; > > + if (offset >= len) > > + return 0; > > + > > That is my fault, I should have added a BUG_ON() spitting out > a callchain in this case, as that function shouldn't be called > if the address is not within its range :-\ Btw., I'd suggest not doing a BUG_ON() but some less destructive warning: both symbol table errors and sample stream hickups are common and can lead to essentially arbitrary input data - we shouldn't crash on that. Thanks, Ingo ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-05 23:35 ` Arun Sharma 2012-03-06 3:06 ` Arnaldo Carvalho de Melo @ 2012-03-06 6:25 ` Ingo Molnar 2012-03-07 1:57 ` [RFC] perf report: Implement symbol filtering on TUI Namhyung Kim 1 sibling, 1 reply; 37+ messages in thread From: Ingo Molnar @ 2012-03-06 6:25 UTC (permalink / raw) To: Arun Sharma Cc: Arnaldo Carvalho de Melo, Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi * Arun Sharma <asharma@fb.com> wrote: > Something like this seems to do it for me. > > offset = addr - sym->start; > + len = sym->end - sym->start; > + if (offset >= len) > + return 0; > + It would be nice to not have such inconsistent sym entries to begin with - i.e. to filter in the symbol code, not in the GUI front-end code. > The other problem area seems to be callchains when using -p > regexp -x options. I'll try to summarize problems there in a > separate thread. Btw., I have a text/regex filtering feature request there going beyond the issue of parent filtering, I often would love to be able to filter the sampled function itself: perf report sched or: perf report time or: perf report perf to only see the list of (kernel) functions whose name name matches those patterns. (and skip all other functions) Especially when I want to improve the tail portion of the profile this would be pretty useful. Today I can only do that with --stdio: perf report | grep sched The -S option is too strict, it only allows individual symbols, no filters. Also, I hate typing '-S' ;-) Thanks, Ingo ^ permalink raw reply [flat|nested] 37+ messages in thread
* [RFC] perf report: Implement symbol filtering on TUI 2012-03-06 6:25 ` Ingo Molnar @ 2012-03-07 1:57 ` Namhyung Kim 2012-03-07 6:07 ` Ingo Molnar 2012-03-14 23:11 ` Arun Sharma 0 siblings, 2 replies; 37+ messages in thread From: Namhyung Kim @ 2012-03-07 1:57 UTC (permalink / raw) To: Ingo Molnar Cc: Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo, Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi As Ingo requested, symbol filtering feature was missing on TUI. Add 's' key to get input from user, and do simple filtering by strstr(). To turn filtering off, just enter no name by pressing 's' followed by ENTER. There should be many issues, but I just want to release this to get some feedbacks. Signed-off-by: Namhyung Kim <namhyung.kim@lge.com> --- tools/perf/builtin-report.c | 21 +++++++-- tools/perf/util/hist.c | 31 ++++++++++++++ tools/perf/util/hist.h | 2 + tools/perf/util/ui/browser.h | 2 + tools/perf/util/ui/browsers/hists.c | 17 +++++++- tools/perf/util/ui/keysyms.h | 3 + tools/perf/util/ui/util.c | 78 +++++++++++++++++++++++++++++++++++ 7 files changed, 148 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 25d34d483e49..64878b63f4f8 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -50,6 +50,7 @@ struct perf_report { const char *pretty_printing_style; symbol_filter_t annotate_init; const char *cpu_list; + const char *symbol_filter_str; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; @@ -326,6 +327,11 @@ static int __cmd_report(struct perf_report *rep) } if (use_browser > 0) { + struct perf_evsel *first; + first = list_entry(session->evlist->entries.next, + struct perf_evsel, node); + first->hists.symbol_filter_str = rep->symbol_filter_str; + perf_evlist__tui_browse_hists(session->evlist, help, NULL, NULL, 0); } else @@ -586,11 +592,16 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) } else symbol_conf.exclude_other = false; - /* - * Any (unrecognized) arguments left? - */ - if (argc) - usage_with_options(report_usage, options); + if (argc) { + /* + * Special case: if there's an argument left then assume tha + * it's a symbol filter: + */ + if (argc > 1) + usage_with_options(report_usage, options); + + report.symbol_filter_str = argv[0]; + } sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6f505d1abac7..56f19d86b0cd 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -15,6 +15,7 @@ enum hist_filter { HIST_FILTER__DSO, HIST_FILTER__THREAD, HIST_FILTER__PARENT, + HIST_FILTER__SYMBOL, }; struct callchain_param callchain_param = { @@ -1179,6 +1180,36 @@ void hists__filter_by_thread(struct hists *hists) } } +static bool hists__filter_entry_by_symbol(struct hists *hists, + struct hist_entry *he) +{ + if (hists->symbol_filter_str != NULL && he->ms.sym && + strstr(he->ms.sym->name, hists->symbol_filter_str) == NULL) { + he->filtered |= (1 << HIST_FILTER__SYMBOL); + return true; + } + + return false; +} + +void hists__filter_by_symbol(struct hists *hists) +{ + struct rb_node *nd; + + hists->nr_entries = hists->stats.total_period = 0; + hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0; + hists__reset_col_len(hists); + + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + if (hists__filter_entry_by_symbol(hists, h)) + continue; + + hists__remove_entry_filter(hists, h, HIST_FILTER__SYMBOL); + } +} + int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip) { return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 48e5acd1e862..020d3477cabc 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -57,6 +57,7 @@ struct hists { const struct thread *thread_filter; const struct dso *dso_filter; const char *uid_filter_str; + const char *symbol_filter_str; pthread_mutex_t lock; struct events_stats stats; u64 event_stream; @@ -96,6 +97,7 @@ int hist_entry__annotate(struct hist_entry *self, size_t privsize); void hists__filter_by_dso(struct hists *hists); void hists__filter_by_thread(struct hists *hists); +void hists__filter_by_symbol(struct hists *hists); u16 hists__col_len(struct hists *self, enum hist_column col); void hists__set_col_len(struct hists *self, enum hist_column col, u16 len); diff --git a/tools/perf/util/ui/browser.h b/tools/perf/util/ui/browser.h index 84d761b730c1..6ee82f60feaf 100644 --- a/tools/perf/util/ui/browser.h +++ b/tools/perf/util/ui/browser.h @@ -49,6 +49,8 @@ int ui_browser__warning(struct ui_browser *browser, int timeout, const char *format, ...); int ui_browser__help_window(struct ui_browser *browser, const char *text); bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text); +int ui_browser__input_window(const char *title, const char *text, char *input, + const char *exit_msg, int delay_sec); void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence); unsigned int ui_browser__argv_refresh(struct ui_browser *browser); diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index bfba0490c098..6a7c918e7b4c 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -863,6 +863,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, struct hist_browser *browser = hist_browser__new(self); struct pstack *fstack; int key = -1; + char buf[64]; if (browser == NULL) return -1; @@ -871,6 +872,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (fstack == NULL) goto out; + hists__filter_by_symbol(self); ui_helpline__push(helpline); while (1) { @@ -915,6 +917,18 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, goto zoom_dso; case 't': goto zoom_thread; + case 's': + if (ui_browser__input_window("Symbol to show", + "Please enter the name of symbol you want to see", + buf, "ENTER: OK, ESC: Cancel", + delay_secs * 2) == K_ENTER) { + self->symbol_filter_str = buf; + ui_helpline__fpush("filtered by \"%s\"", + self->symbol_filter_str); + } + hists__filter_by_symbol(self); + hist_browser__reset(browser); + continue; case K_F1: case 'h': case '?': @@ -932,7 +946,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "C Collapse all callchains\n" "E Expand all callchains\n" "d Zoom into current DSO\n" - "t Zoom into current Thread"); + "t Zoom into current Thread\n" + "s Filter symbol by name"); continue; case K_ENTER: case K_RIGHT: diff --git a/tools/perf/util/ui/keysyms.h b/tools/perf/util/ui/keysyms.h index 3458b1985761..7bc34084d663 100644 --- a/tools/perf/util/ui/keysyms.h +++ b/tools/perf/util/ui/keysyms.h @@ -16,6 +16,9 @@ #define K_TAB '\t' #define K_UNTAB SL_KEY_UNTAB #define K_UP SL_KEY_UP +//#define K_BKSPC SL_KEY_BACKSPACE +#define K_BKSPC 0x7f +#define K_DEL SL_KEY_DELETE /* Not really keys */ #define K_TIMER -1 diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c index 45daa7c41dad..360f43fd5400 100644 --- a/tools/perf/util/ui/util.c +++ b/tools/perf/util/ui/util.c @@ -69,6 +69,84 @@ int ui__popup_menu(int argc, char * const argv[]) return popup_menu__run(&menu); } +int ui_browser__input_window(const char *title, const char *text, char *input, + const char *exit_msg, int delay_secs) +{ + int x, y, len, key; + int max_len = 60, nr_lines = 0; + static char buf[50]; + const char *t; + + t = text; + while (1) { + const char *sep = strchr(t, '\n'); + + if (sep == NULL) + sep = strchr(t, '\0'); + len = sep - t; + if (max_len < len) + max_len = len; + ++nr_lines; + if (*sep == '\0') + break; + t = sep + 1; + } + + max_len += 2; + nr_lines += 8; + y = SLtt_Screen_Rows / 2 - nr_lines / 2; + x = SLtt_Screen_Cols / 2 - max_len / 2; + + SLsmg_set_color(0); + SLsmg_draw_box(y, x++, nr_lines, max_len); + if (title) { + SLsmg_gotorc(y, x + 1); + SLsmg_write_string((char *)title); + } + SLsmg_gotorc(++y, x); + nr_lines -= 7; + max_len -= 2; + SLsmg_write_wrapped_string((unsigned char *)text, y, x, + nr_lines, max_len, 1); + y += nr_lines + 1; + SLsmg_set_color(0); + SLsmg_draw_box(y - 1, x + 1, 3, max_len - 2); + + SLsmg_gotorc(y + 3, x); + SLsmg_write_nstring((char *)exit_msg, max_len); + SLsmg_refresh(); + + x += 2; + len = 0; + key = ui__getch(delay_secs); + while (key != K_TIMER && key != K_ENTER && key != K_ESC) { + if (key == K_BKSPC) { + if (len == 0) + goto next_key; + SLsmg_gotorc(y, x + --len); + SLsmg_write_char(' '); + } else { + buf[len] = key; + SLsmg_gotorc(y, x + len++); + SLsmg_write_char(key); + } + SLsmg_refresh(); + + /* XXX more graceful overflow handling needed */ + if (len == sizeof(buf) - 1) { + ui_helpline__push("maximum size of symbol name reached!"); + key = K_ENTER; + break; + } +next_key: + key = ui__getch(delay_secs); + } + + buf[len] = '\0'; + strncpy(input, buf, len+1); + return key; +} + int ui__question_window(const char *title, const char *text, const char *exit_msg, int delay_secs) { -- 1.7.9 ^ permalink raw reply related [flat|nested] 37+ messages in thread
* Re: [RFC] perf report: Implement symbol filtering on TUI 2012-03-07 1:57 ` [RFC] perf report: Implement symbol filtering on TUI Namhyung Kim @ 2012-03-07 6:07 ` Ingo Molnar 2012-03-07 8:04 ` Namhyung Kim 2012-03-14 23:11 ` Arun Sharma 1 sibling, 1 reply; 37+ messages in thread From: Ingo Molnar @ 2012-03-07 6:07 UTC (permalink / raw) To: Namhyung Kim Cc: Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo, Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi * Namhyung Kim <namhyung.kim@lge.com> wrote: > As Ingo requested, symbol filtering feature was missing on TUI. > Add 's' key to get input from user, and do simple filtering by > strstr(). To turn filtering off, just enter no name by pressing > 's' followed by ENTER. > > There should be many issues, but I just want to release this > to get some feedbacks. I'd love it if in addition to the hotkey, if I typed the obvious sequence: $ perf report sched ... then it would turn into such a filter automagically. Thanks, Ingo ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] perf report: Implement symbol filtering on TUI 2012-03-07 6:07 ` Ingo Molnar @ 2012-03-07 8:04 ` Namhyung Kim 2012-03-08 10:44 ` Ingo Molnar 0 siblings, 1 reply; 37+ messages in thread From: Namhyung Kim @ 2012-03-07 8:04 UTC (permalink / raw) To: Ingo Molnar Cc: Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo, Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi Hi, Ingo 2012-03-07 3:07 PM, Ingo Molnar wrote: > > * Namhyung Kim <namhyung.kim@lge.com> wrote: > >> As Ingo requested, symbol filtering feature was missing on TUI. >> Add 's' key to get input from user, and do simple filtering by >> strstr(). To turn filtering off, just enter no name by pressing >> 's' followed by ENTER. >> >> There should be many issues, but I just want to release this >> to get some feedbacks. > > I'd love it if in addition to the hotkey, if I typed the obvious > sequence: > > $ perf report sched > > ... then it would turn into such a filter automagically. > Oh, I implemented that already. Please test it! :) Thanks, Namhyung ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] perf report: Implement symbol filtering on TUI 2012-03-07 8:04 ` Namhyung Kim @ 2012-03-08 10:44 ` Ingo Molnar 2012-03-09 1:53 ` Namhyung Kim 0 siblings, 1 reply; 37+ messages in thread From: Ingo Molnar @ 2012-03-08 10:44 UTC (permalink / raw) To: Namhyung Kim Cc: Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo, Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi * Namhyung Kim <namhyung.kim@lge.com> wrote: > Hi, Ingo > > 2012-03-07 3:07 PM, Ingo Molnar wrote: > > > >* Namhyung Kim <namhyung.kim@lge.com> wrote: > > > >>As Ingo requested, symbol filtering feature was missing on TUI. > >>Add 's' key to get input from user, and do simple filtering by > >>strstr(). To turn filtering off, just enter no name by pressing > >>'s' followed by ENTER. > >> > >>There should be many issues, but I just want to release this > >>to get some feedbacks. > > > >I'd love it if in addition to the hotkey, if I typed the obvious > >sequence: > > > > $ perf report sched > > > >... then it would turn into such a filter automagically. > > > > Oh, I implemented that already. Please test it! :) Cool - I tried it out and it works just as it should! I noticed two details: - "perf report sched | less" does not work as expected - such kinds of features should be GUI-frontend agnostic. - unknown symbols are not matched, and thus they will show up indiscrimnately even though I only want to see them if the filter is something like '0x' or 'unknown'. Anyway, apart from these two details: Tested-by: Ingo Molnar <mingo@elte.hu> Thanks, Ingo ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] perf report: Implement symbol filtering on TUI 2012-03-08 10:44 ` Ingo Molnar @ 2012-03-09 1:53 ` Namhyung Kim 2012-03-09 7:36 ` Ingo Molnar 0 siblings, 1 reply; 37+ messages in thread From: Namhyung Kim @ 2012-03-09 1:53 UTC (permalink / raw) To: Ingo Molnar Cc: Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo, Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi Hi, 2012-03-08 7:44 PM, Ingo Molnar wrote: > > * Namhyung Kim <namhyung.kim@lge.com> wrote: > >> Hi, Ingo >> >> 2012-03-07 3:07 PM, Ingo Molnar wrote: >>> >>> * Namhyung Kim <namhyung.kim@lge.com> wrote: >>> >>>> As Ingo requested, symbol filtering feature was missing on TUI. >>>> Add 's' key to get input from user, and do simple filtering by >>>> strstr(). To turn filtering off, just enter no name by pressing >>>> 's' followed by ENTER. >>>> >>>> There should be many issues, but I just want to release this >>>> to get some feedbacks. >>> >>> I'd love it if in addition to the hotkey, if I typed the obvious >>> sequence: >>> >>> $ perf report sched >>> >>> ... then it would turn into such a filter automagically. >>> >> >> Oh, I implemented that already. Please test it! :) > > Cool - I tried it out and it works just as it should! > > I noticed two details: > > - "perf report sched | less" does not work as expected - such > kinds of features should be GUI-frontend agnostic. > Will fix. > - unknown symbols are not matched, and thus they will show up > indiscrimnately even though I only want to see them if the > filter is something like '0x' or 'unknown'. > Since they have no symbol. :) In the current implementation, it will actually show you such symbols if you enter '0x' or 'unknown' as a filter unless there're symbols that have those letters in its name. I can think of 3 solutions for this now: 1. Adding a special filter keyword (like 'unknown'). But there's probably some symbols which have those letters. 2. If filter string consists of (hex-) digits only, it will only show hist entries doesn't have symbols, or tries to match based on its ip. 3. Implement zooming-in to "unknown" dso. Maybe it's a different issue, but I think it's good to have and it'll helps this too. What do you guys think? > Anyway, apart from these two details: > > Tested-by: Ingo Molnar <mingo@elte.hu> > Thanks for testing and suggestions. Namhyung ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] perf report: Implement symbol filtering on TUI 2012-03-09 1:53 ` Namhyung Kim @ 2012-03-09 7:36 ` Ingo Molnar 2012-03-09 8:03 ` Namhyung Kim 0 siblings, 1 reply; 37+ messages in thread From: Ingo Molnar @ 2012-03-09 7:36 UTC (permalink / raw) To: Namhyung Kim Cc: Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo, Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi * Namhyung Kim <namhyung.kim@lge.com> wrote: > > - unknown symbols are not matched, and thus they will show up > > indiscrimnately even though I only want to see them if the > > filter is something like '0x' or 'unknown'. > > > > Since they have no symbol. :) In the current implementation, > it will actually show you such symbols if you enter '0x' or > 'unknown' as a filter unless there're symbols that have those > letters in its name. > > I can think of 3 solutions for this now: > > 1. Adding a special filter keyword (like 'unknown'). But there's > probably some symbols which have those letters. > > 2. If filter string consists of (hex-) digits only, it will only > show hist entries doesn't have symbols, or tries to match based on > its ip. > > 3. Implement zooming-in to "unknown" dso. Maybe it's a different > issue, but I think it's good to have and it'll helps this too. > > What do you guys think? Well, the main problem I had was that they showed up in the 'perf report sched' filtered output and messed it up. I.e. I'm not interested in unknown symbols much, I'm interested in *not* seeing them when I type 'perf report sched'. Thanks, Ingo ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] perf report: Implement symbol filtering on TUI 2012-03-09 7:36 ` Ingo Molnar @ 2012-03-09 8:03 ` Namhyung Kim 0 siblings, 0 replies; 37+ messages in thread From: Namhyung Kim @ 2012-03-09 8:03 UTC (permalink / raw) To: Ingo Molnar Cc: Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo, Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi Hi, 2012-03-09 4:36 PM, Ingo Molnar wrote: > Well, the main problem I had was that they showed up in the > 'perf report sched' filtered output and messed it up. > > I.e. I'm not interested in unknown symbols much, I'm interested > in *not* seeing them when I type 'perf report sched'. > It should be easy to get rid of unknown symbols from filtered output. So I just wanted how to deal with if someone would need to show (part of) those symbols only. But now I guess it could be handled at dso/comm level. Thanks, Namhyung ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] perf report: Implement symbol filtering on TUI 2012-03-07 1:57 ` [RFC] perf report: Implement symbol filtering on TUI Namhyung Kim 2012-03-07 6:07 ` Ingo Molnar @ 2012-03-14 23:11 ` Arun Sharma 2012-03-15 0:44 ` Namhyung Kim 1 sibling, 1 reply; 37+ messages in thread From: Arun Sharma @ 2012-03-14 23:11 UTC (permalink / raw) To: Namhyung Kim Cc: Ingo Molnar, Namhyung Kim, Arun Sharma, Arnaldo Carvalho de Melo, Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi On Wed, Mar 07, 2012 at 10:57:50AM +0900, Namhyung Kim wrote: > As Ingo requested, symbol filtering feature was missing on TUI. > Add 's' key to get input from user, and do simple filtering by > strstr(). To turn filtering off, just enter no name by pressing > 's' followed by ENTER. Why not do this for --stdio as well? perf report foo -g graph,0.5,caller -s inclusive --stdio will print only the callgraph under foo. This works better for me than: perf report -s parent -p ^c$ --stdio -Arun index 94394f3..607b21b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -219,6 +219,8 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, struct hists *hists = &pos->hists; const char *evname = event_name(pos); + hists->symbol_filter_str = rep->symbol_filter_str; + hists__filter_by_symbol(hists); hists__fprintf_nr_sample_events(hists, evname, stdout); hists__fprintf(hists, NULL, false, true, 0, 0, stdout); fprintf(stdout, "\n\n"); ^ permalink raw reply related [flat|nested] 37+ messages in thread
* Re: [RFC] perf report: Implement symbol filtering on TUI 2012-03-14 23:11 ` Arun Sharma @ 2012-03-15 0:44 ` Namhyung Kim 2012-03-15 21:46 ` Arun Sharma 0 siblings, 1 reply; 37+ messages in thread From: Namhyung Kim @ 2012-03-15 0:44 UTC (permalink / raw) To: Arun Sharma Cc: Ingo Molnar, Namhyung Kim, Arnaldo Carvalho de Melo, Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi Hi, 2012-03-15 8:11 AM, Arun Sharma wrote: > On Wed, Mar 07, 2012 at 10:57:50AM +0900, Namhyung Kim wrote: >> As Ingo requested, symbol filtering feature was missing on TUI. >> Add 's' key to get input from user, and do simple filtering by >> strstr(). To turn filtering off, just enter no name by pressing >> 's' followed by ENTER. > > Why not do this for --stdio as well? > > perf report foo -g graph,0.5,caller -s inclusive --stdio > > will print only the callgraph under foo. > > This works better for me than: > > perf report -s parent -p ^c$ --stdio > > -Arun Thanks for reviewing and sending the patch. However this was already fixed on my new patch set. Please see below: https://lkml.org/lkml/2012/3/13/73 Thanks, Namhyung > > index 94394f3..607b21b 100644 > --- a/tools/perf/builtin-report.c > +++ b/tools/perf/builtin-report.c > @@ -219,6 +219,8 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, > struct hists *hists =&pos->hists; > const char *evname = event_name(pos); > > + hists->symbol_filter_str = rep->symbol_filter_str; > + hists__filter_by_symbol(hists); > hists__fprintf_nr_sample_events(hists, evname, stdout); > hists__fprintf(hists, NULL, false, true, 0, 0, stdout); > fprintf(stdout, "\n\n"); > > ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [RFC] perf report: Implement symbol filtering on TUI 2012-03-15 0:44 ` Namhyung Kim @ 2012-03-15 21:46 ` Arun Sharma 0 siblings, 0 replies; 37+ messages in thread From: Arun Sharma @ 2012-03-15 21:46 UTC (permalink / raw) To: Namhyung Kim Cc: Ingo Molnar, Namhyung Kim, Arnaldo Carvalho de Melo, Peter Zijlstra, Stephane Eranian, linux-kernel, dsahern, ravitillo, khandual, robert.richter, ming.m.lin, vweaver1, andi On 3/14/12 5:44 PM, Namhyung Kim wrote: > Thanks for reviewing and sending the patch. > > However this was already fixed on my new patch set. Please see below: > > https://lkml.org/lkml/2012/3/13/73 > I still get different behaviors with and without my two line patch. Try: perf record -g perf report c -G -g graph,0.5,caller -s inclusive --stdio I don't see where hists__filter_by_symbol() gets called when using --stdio. But let's examine what I think is a more important general issue (not specific to your patch): Here's the behavior my coworkers are requesting. With the test program I mailed earlier with this callgraph: ./perf report -G -g graph,0.5,caller -s pid --stdio 100.00% foo:30804 | --- __libc_start_main | --99.87%-- main | |--50.01%-- a | | | --42.71%-- c | | | |--14.36%-- e | | | | | --7.09%-- f | | | --14.33%-- d | | | --6.81%-- f | --49.86%-- b | --42.72%-- c | |--14.43%-- d | | | --7.30%-- f | --14.13%-- e | --7.05%-- f If the user filters by "d", we should: a) Construct a callgraph that has only callchains containing "d" b) Express percentages as a fraction of the unfiltered total_period c) Allow users to expand callchains and see both the callers and callees of d. Right now, I can do this in two steps: # Get the callgraph leading to "d" perf report -S d -G -g graph,0.5,callee 100.00% foo foo | --- __libc_start_main main | |--51.37%-- a | c | d | --48.63%-- b c d # Get the callgraph below "d" (i.e. where d is the caller) perf report d -G -g graph,0.5,caller -s inclusive 95.56% [.] d | --- d | --46.88%-- f and the percentages are not expressed as a percentages of the total_period. I'll send out a RFC patch next. -Arun ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-05 15:47 ` Ingo Molnar 2012-03-05 15:50 ` Ingo Molnar @ 2012-03-05 15:52 ` Stephane Eranian 2012-03-07 12:49 ` Stephane Eranian 2 siblings, 0 replies; 37+ messages in thread From: Stephane Eranian @ 2012-03-05 15:52 UTC (permalink / raw) To: Ingo Molnar Cc: linux-kernel, acme, peterz, dsahern, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi, Peter Zijlstra On Mon, Mar 5, 2012 at 4:47 PM, Ingo Molnar <mingo@elte.hu> wrote: > > > Another usability bug of the new branch sampling feature is that > typing the obvious: > > $ perf record -b ./myprog > > does not do the obvious thing and default to 'any' but throws a > *very* unhelpful generic perf error: > Fair enough. I can add code to handle this case. > > usage: perf record [<options>] [<command>] > or: perf record [<options>] -- <command> [<options>] > > ... > > Really, we should do better than this. Only people who *want* to > specify finer LBR filters should be forced to specify them. > > Thanks, > > Ingo ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH] perf report: auto-detect branch stack sampling mode 2012-03-05 15:47 ` Ingo Molnar 2012-03-05 15:50 ` Ingo Molnar 2012-03-05 15:52 ` [PATCH] perf report: auto-detect branch stack sampling mode Stephane Eranian @ 2012-03-07 12:49 ` Stephane Eranian 2 siblings, 0 replies; 37+ messages in thread From: Stephane Eranian @ 2012-03-07 12:49 UTC (permalink / raw) To: Ingo Molnar Cc: linux-kernel, acme, peterz, dsahern, ravitillo, khandual, asharma, robert.richter, ming.m.lin, vweaver1, andi, Peter Zijlstra On Mon, Mar 5, 2012 at 4:47 PM, Ingo Molnar <mingo@elte.hu> wrote: > > Another usability bug of the new branch sampling feature is that > typing the obvious: > > $ perf record -b ./myprog > > does not do the obvious thing and default to 'any' but throws a > *very* unhelpful generic perf error: > Ok, so looked at this. Given the option parsing code, looks like we would have to split the option in two: -b and something else to disambiguate cmdline such as: $ perf record -b foo $ perf record -b any_call foo any_call: no such file or directory The code cannot disambiguate between any_call being a branch filter vs. the command to run, given that -b has now an optional argument. What we can do is split: -b and --branch-filter for instance. The former takes no argument and sets up branch-stack to ANY. The other one requires a parameter. I am almost done with TUI navigation of branch samples. > usage: perf record [<options>] [<command>] > or: perf record [<options>] -- <command> [<options>] > > ... > > Really, we should do better than this. Only people who *want* to > specify finer LBR filters should be forced to specify them. > > Thanks, > > Ingo ^ permalink raw reply [flat|nested] 37+ messages in thread
end of thread, other threads:[~2012-03-15 21:46 UTC | newest] Thread overview: 37+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2012-02-24 9:40 [PATCH] perf report: auto-detect branch stack sampling mode Stephane Eranian 2012-02-24 15:24 ` David Ahern 2012-02-24 15:28 ` Stephane Eranian 2012-02-24 15:31 ` David Ahern 2012-02-24 15:40 ` Stephane Eranian 2012-02-24 15:49 ` David Ahern 2012-02-24 15:51 ` Stephane Eranian 2012-03-02 17:47 ` Stephane Eranian 2012-03-02 19:08 ` Arnaldo Carvalho de Melo 2012-03-03 19:43 ` Arnaldo Carvalho de Melo 2012-03-05 10:49 ` Ingo Molnar 2012-03-05 11:11 ` Peter Zijlstra 2012-03-05 15:47 ` Ingo Molnar 2012-03-05 15:50 ` Ingo Molnar 2012-03-05 15:56 ` Ingo Molnar 2012-03-05 16:30 ` Peter Zijlstra 2012-03-05 16:32 ` Stephane Eranian 2012-03-05 17:20 ` Ingo Molnar 2012-03-05 20:35 ` Arnaldo Carvalho de Melo 2012-03-05 21:43 ` Arun Sharma 2012-03-05 22:26 ` Arnaldo Carvalho de Melo 2012-03-05 23:35 ` Arun Sharma 2012-03-06 3:06 ` Arnaldo Carvalho de Melo 2012-03-06 6:27 ` Ingo Molnar 2012-03-06 6:25 ` Ingo Molnar 2012-03-07 1:57 ` [RFC] perf report: Implement symbol filtering on TUI Namhyung Kim 2012-03-07 6:07 ` Ingo Molnar 2012-03-07 8:04 ` Namhyung Kim 2012-03-08 10:44 ` Ingo Molnar 2012-03-09 1:53 ` Namhyung Kim 2012-03-09 7:36 ` Ingo Molnar 2012-03-09 8:03 ` Namhyung Kim 2012-03-14 23:11 ` Arun Sharma 2012-03-15 0:44 ` Namhyung Kim 2012-03-15 21:46 ` Arun Sharma 2012-03-05 15:52 ` [PATCH] perf report: auto-detect branch stack sampling mode Stephane Eranian 2012-03-07 12:49 ` Stephane Eranian
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).