* [PATCH 2/5] perf script: extend db-export api to include callchains for samples
2016-04-19 8:56 [PATCH 1/5] perf tools: fix incorrect ordering of callchain entries Chris Phlipot
@ 2016-04-19 8:56 ` Chris Phlipot
2016-04-22 7:56 ` Adrian Hunter
2016-04-19 8:56 ` [PATCH 3/5] perf script: fix postgresql ubuntu install instructions Chris Phlipot
` (3 subsequent siblings)
4 siblings, 1 reply; 13+ messages in thread
From: Chris Phlipot @ 2016-04-19 8:56 UTC (permalink / raw)
To: acme, adrian.hunter, mingo, peterz; +Cc: linux-kernel, Chris Phlipot
The current implementation of the python database export API only
includes call path information when using some form of call/return
tracing, but is unable to do so when sampling.
The following API extensions allow exporting of data collected by
perf record when using --call-graph.
The additions to the python api include the following:
- add call_path_id to sample_table to allow association of samples
with call paths
- add dso_id to call_path_table to more closely align the data with that
of a callchain_node
db-export and trace-script-python were both extended to accomidate the API
changes listed above.
Thread-stack's functionality was expanded to allow storage and exporting
of callchains that result from individual samples.
- Introduced a new static function (thread_stack__process_callchain) to
resolve call paths using the existing callchain resolution provided by
thread__resolve_callchain
- The existing call_path tree in call_return_processor is used for storing
the data from the resolved callchain.
- Call_return_processor was also extended to allow the ability to export
full call paths in addtion to the existing individual call/return pairs,
since call/return pairs are not available when doing sampling.
The code was tested using call graphs from fp and dwarf.
export-to-postgresqlwas utilized with intel-pt data to verify that changes
did not negatively affect existing behavior of the db-export api.
Signed-off-by: Chris Phlipot <cphlipot0@gmail.com>
---
tools/perf/util/db-export.c | 21 ++-
tools/perf/util/db-export.h | 2 +
.../util/scripting-engines/trace-event-python.c | 20 ++-
tools/perf/util/thread-stack.c | 162 +++++++++++++++++----
tools/perf/util/thread-stack.h | 14 +-
5 files changed, 184 insertions(+), 35 deletions(-)
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 049438d..69c9a9d 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -329,6 +329,13 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
if (err)
goto out_put;
+ dbe->call_path_last_seen_db_id = 0;
+ if(dbe->crp) {
+ thread_stack__process_callchain(thread, comm, evsel,
+ al->machine, sample,
+ PERF_MAX_STACK_DEPTH, dbe->crp);
+ }
+
if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
sample_addr_correlates_sym(&evsel->attr)) {
struct addr_location addr_al;
@@ -346,6 +353,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
goto out_put;
}
}
+ es.call_path_db_id = dbe->call_path_last_seen_db_id;
if (dbe->export_sample)
err = dbe->export_sample(dbe, &es);
@@ -397,9 +405,10 @@ int db_export__branch_types(struct db_export *dbe)
int db_export__call_path(struct db_export *dbe, struct call_path *cp)
{
int err;
-
- if (cp->db_id)
+ if (cp->db_id) {
+ dbe->call_path_last_seen_db_id = cp->db_id;
return 0;
+ }
if (cp->parent) {
err = db_export__call_path(dbe, cp->parent);
@@ -409,8 +418,14 @@ int db_export__call_path(struct db_export *dbe, struct call_path *cp)
cp->db_id = ++dbe->call_path_last_db_id;
- if (dbe->export_call_path)
+ if (dbe->export_call_path) {
+ if (cp->dso)
+ db_export__dso(dbe, cp->dso, cp->machine);
+ if (cp->sym && cp->dso)
+ db_export__symbol(dbe, cp->sym, cp->dso);
+ dbe->call_path_last_seen_db_id = cp->db_id;
return dbe->export_call_path(dbe, cp);
+ }
return 0;
}
diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h
index 25e22fd..40e3b07 100644
--- a/tools/perf/util/db-export.h
+++ b/tools/perf/util/db-export.h
@@ -43,6 +43,7 @@ struct export_sample {
u64 addr_dso_db_id;
u64 addr_sym_db_id;
u64 addr_offset; /* addr offset from symbol start */
+ u64 call_path_db_id;
};
struct db_export {
@@ -73,6 +74,7 @@ struct db_export {
u64 symbol_last_db_id;
u64 sample_last_db_id;
u64 call_path_last_db_id;
+ u64 call_path_last_seen_db_id; /* last db_id seen(exported or not) */
u64 call_return_last_db_id;
struct list_head deferred;
};
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 525eb49..ca3f9c6 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -681,7 +681,7 @@ static int python_export_sample(struct db_export *dbe,
struct tables *tables = container_of(dbe, struct tables, dbe);
PyObject *t;
- t = tuple_new(21);
+ t = tuple_new(22);
tuple_set_u64(t, 0, es->db_id);
tuple_set_u64(t, 1, es->evsel->db_id);
@@ -704,6 +704,8 @@ static int python_export_sample(struct db_export *dbe,
tuple_set_u64(t, 18, es->sample->data_src);
tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK);
tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX));
+ tuple_set_s32(t, 21, es->call_path_db_id);
+
call_object(tables->sample_handler, t, "sample_table");
@@ -716,17 +718,19 @@ static int python_export_call_path(struct db_export *dbe, struct call_path *cp)
{
struct tables *tables = container_of(dbe, struct tables, dbe);
PyObject *t;
- u64 parent_db_id, sym_db_id;
+ u64 parent_db_id, sym_db_id, dso_db_id;
parent_db_id = cp->parent ? cp->parent->db_id : 0;
sym_db_id = cp->sym ? *(u64 *)symbol__priv(cp->sym) : 0;
+ dso_db_id = cp->dso ? cp->dso->db_id : 0;
- t = tuple_new(4);
+ t = tuple_new(5);
tuple_set_u64(t, 0, cp->db_id);
tuple_set_u64(t, 1, parent_db_id);
tuple_set_u64(t, 2, sym_db_id);
tuple_set_u64(t, 3, cp->ip);
+ tuple_set_u64(t, 4, dso_db_id);
call_object(tables->call_path_handler, t, "call_path_table");
@@ -763,6 +767,13 @@ static int python_export_call_return(struct db_export *dbe,
return 0;
}
+static int python_process_call_path(struct call_path *cp, void *data)
+{
+ struct db_export *dbe = data;
+
+ return db_export__call_path(dbe, cp);
+}
+
static int python_process_call_return(struct call_return *cr, void *data)
{
struct db_export *dbe = data;
@@ -1027,7 +1038,8 @@ static void set_table_handlers(struct tables *tables)
if (export_calls) {
tables->dbe.crp =
- call_return_processor__new(python_process_call_return,
+ call_return_processor__new(python_process_call_path,
+ python_process_call_return,
&tables->dbe);
if (!tables->dbe.crp)
Py_FatalError("failed to create calls processor");
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 679688e..38a749d 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -22,6 +22,7 @@
#include "debug.h"
#include "symbol.h"
#include "comm.h"
+#include "callchain.h"
#include "thread-stack.h"
#define CALL_PATH_BLOCK_SHIFT 8
@@ -56,7 +57,8 @@ struct call_path_root {
*/
struct call_return_processor {
struct call_path_root *cpr;
- int (*process)(struct call_return *cr, void *data);
+ int (*process_call_path)(struct call_path *cp, void *data);
+ int (*process_call_return)(struct call_return *cr, void *data);
void *data;
};
@@ -216,7 +218,7 @@ static int thread_stack__call_return(struct thread *thread,
if (no_return)
cr.flags |= CALL_RETURN_NO_RETURN;
- return crp->process(&cr, crp->data);
+ return crp->process_call_return(&cr, crp->data);
}
static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
@@ -336,9 +338,12 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
}
static void call_path__init(struct call_path *cp, struct call_path *parent,
+ struct machine *machine, struct dso *dso,
struct symbol *sym, u64 ip, bool in_kernel)
{
cp->parent = parent;
+ cp->machine = machine;
+ cp->dso = dso;
cp->sym = sym;
cp->ip = sym ? 0 : ip;
cp->db_id = 0;
@@ -354,7 +359,7 @@ static struct call_path_root *call_path_root__new(void)
cpr = zalloc(sizeof(struct call_path_root));
if (!cpr)
return NULL;
- call_path__init(&cpr->call_path, NULL, NULL, 0, false);
+ call_path__init(&cpr->call_path, NULL, NULL, NULL, NULL, 0, false);
INIT_LIST_HEAD(&cpr->blocks);
return cpr;
}
@@ -372,8 +377,9 @@ static void call_path_root__free(struct call_path_root *cpr)
static struct call_path *call_path__new(struct call_path_root *cpr,
struct call_path *parent,
- struct symbol *sym, u64 ip,
- bool in_kernel)
+ struct machine *machine,
+ struct dso *dso, struct symbol *sym,
+ u64 ip, bool in_kernel)
{
struct call_path_block *cpb;
struct call_path *cp;
@@ -393,14 +399,16 @@ static struct call_path *call_path__new(struct call_path_root *cpr,
n = cpr->next++ & CALL_PATH_BLOCK_MASK;
cp = &cpb->cp[n];
- call_path__init(cp, parent, sym, ip, in_kernel);
+ call_path__init(cp, parent, machine, dso, sym, ip, in_kernel);
return cp;
}
static struct call_path *call_path__findnew(struct call_path_root *cpr,
struct call_path *parent,
- struct symbol *sym, u64 ip, u64 ks)
+ struct machine *machine,
+ struct dso *dso, struct symbol *sym,
+ u64 ip, u64 ks)
{
struct rb_node **p;
struct rb_node *node_parent = NULL;
@@ -411,23 +419,28 @@ static struct call_path *call_path__findnew(struct call_path_root *cpr,
ip = 0;
if (!parent)
- return call_path__new(cpr, parent, sym, ip, in_kernel);
+ return call_path__new(cpr, parent, machine, dso, sym, ip,
+ in_kernel);
p = &parent->children.rb_node;
while (*p != NULL) {
node_parent = *p;
cp = rb_entry(node_parent, struct call_path, rb_node);
- if (cp->sym == sym && cp->ip == ip)
+ if (cp->sym == sym && cp->ip == ip && cp->dso == dso)
return cp;
- if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
+ if (sym < cp->sym || (sym == cp->sym && ip < cp->ip) ||
+ (sym == cp->sym && ip == cp->ip
+ && dso < cp->dso) ||
+ (sym == cp->sym && ip == cp->ip
+ && dso == cp->dso && machine < cp->machine))
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
}
- cp = call_path__new(cpr, parent, sym, ip, in_kernel);
+ cp = call_path__new(cpr, parent, machine, dso, sym, ip, in_kernel);
if (!cp)
return NULL;
@@ -438,7 +451,10 @@ static struct call_path *call_path__findnew(struct call_path_root *cpr,
}
struct call_return_processor *
-call_return_processor__new(int (*process)(struct call_return *cr, void *data),
+call_return_processor__new(int (*process_call_path)(struct call_path *cp,
+ void *data),
+ int (*process_call_return)(struct call_return *cr,
+ void *data),
void *data)
{
struct call_return_processor *crp;
@@ -449,7 +465,8 @@ call_return_processor__new(int (*process)(struct call_return *cr, void *data),
crp->cpr = call_path_root__new();
if (!crp->cpr)
goto out_free;
- crp->process = process;
+ crp->process_call_path = process_call_path;
+ crp->process_call_return = process_call_return;
crp->data = data;
return crp;
@@ -492,7 +509,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
u64 ret_addr, u64 timestamp, u64 ref,
- struct symbol *sym)
+ struct dso *dso, struct symbol *sym)
{
int err;
@@ -502,7 +519,7 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
if (ts->cnt == 1) {
struct thread_stack_entry *tse = &ts->stack[0];
- if (tse->cp->sym == sym)
+ if (tse->cp->dso == dso && tse->cp->sym == sym)
return thread_stack__call_return(thread, ts, --ts->cnt,
timestamp, ref, false);
}
@@ -540,20 +557,28 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
{
struct call_path_root *cpr = ts->crp->cpr;
struct call_path *cp;
+ struct machine *machine;
+ struct dso *dso = NULL;
struct symbol *sym;
u64 ip;
if (sample->ip) {
ip = sample->ip;
sym = from_al->sym;
+ if (from_al->map)
+ dso = from_al->map->dso;
+ machine = from_al->machine;
} else if (sample->addr) {
ip = sample->addr;
sym = to_al->sym;
+ if (to_al->map)
+ dso = to_al->map->dso;
+ machine = to_al->machine;
} else {
return 0;
}
- cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
+ cp = call_path__findnew(cpr, &cpr->call_path, machine, dso, sym, ip,
ts->kernel_start);
if (!cp)
return -ENOMEM;
@@ -586,6 +611,7 @@ static int thread_stack__no_call_return(struct thread *thread,
/* If the stack is empty, push the userspace address */
if (!ts->cnt) {
cp = call_path__findnew(cpr, &cpr->call_path,
+ to_al->machine, to_al->map->dso,
to_al->sym, sample->addr,
ts->kernel_start);
if (!cp)
@@ -610,7 +636,8 @@ static int thread_stack__no_call_return(struct thread *thread,
parent = &cpr->call_path;
/* This 'return' had no 'call', so push and pop top of stack */
- cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
+ cp = call_path__findnew(cpr, parent, from_al->machine,
+ from_al->map->dso, from_al->sym, sample->ip,
ts->kernel_start);
if (!cp)
return -ENOMEM;
@@ -621,7 +648,7 @@ static int thread_stack__no_call_return(struct thread *thread,
return err;
return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
- to_al->sym);
+ to_al->map->dso, to_al->sym);
}
static int thread_stack__trace_begin(struct thread *thread,
@@ -636,7 +663,7 @@ static int thread_stack__trace_begin(struct thread *thread,
/* Pop trace end */
tse = &ts->stack[ts->cnt - 1];
- if (tse->cp->sym == NULL && tse->cp->ip == 0) {
+ if (tse->cp->dso == NULL && tse->cp->sym == NULL && tse->cp->ip == 0) {
err = thread_stack__call_return(thread, ts, --ts->cnt,
timestamp, ref, false);
if (err)
@@ -657,7 +684,7 @@ static int thread_stack__trace_end(struct thread_stack *ts,
if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
return 0;
- cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
+ cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, NULL, NULL, 0,
ts->kernel_start);
if (!cp)
return -ENOMEM;
@@ -668,14 +695,11 @@ static int thread_stack__trace_end(struct thread_stack *ts,
false);
}
-int thread_stack__process(struct thread *thread, struct comm *comm,
- struct perf_sample *sample,
- struct addr_location *from_al,
- struct addr_location *to_al, u64 ref,
- struct call_return_processor *crp)
+static int __thread_stack__process_init(struct thread *thread,
+ struct comm *comm,
+ struct call_return_processor *crp)
{
struct thread_stack *ts = thread->ts;
- int err = 0;
if (ts) {
if (!ts->crp) {
@@ -694,6 +718,80 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
ts = thread->ts;
ts->comm = comm;
}
+ return 0;
+}
+
+int thread_stack__process_callchain(struct thread *thread, struct comm *comm,
+ struct perf_evsel *evsel,
+ struct machine *machine,
+ struct perf_sample *sample, int max_stack,
+ struct call_return_processor *crp)
+{
+ struct call_path *current = &crp->cpr->call_path;
+ struct thread_stack *ts = NULL;
+ enum chain_order saved_order = callchain_param.order;
+ int err = 0;
+
+ if (!symbol_conf.use_callchain || !sample->callchain)
+ return err;
+
+ err = __thread_stack__process_init(thread, comm, crp);
+ if(err)
+ return err;
+
+ ts = thread->ts;
+
+
+ callchain_param.order = ORDER_CALLER;
+ err = thread__resolve_callchain(thread, &callchain_cursor, evsel,
+ sample, NULL, NULL, max_stack);
+ if (err) {
+ callchain_param.order = saved_order;
+ return err;
+ }
+ callchain_cursor_commit(&callchain_cursor);
+
+ while (1) {
+ struct callchain_cursor_node *node;
+ struct dso *dso = NULL;
+ node = callchain_cursor_current(&callchain_cursor);
+ if (!node)
+ break;
+ if (node->map)
+ dso = node->map->dso;
+
+ current = call_path__findnew(ts->crp->cpr, current, machine,
+ dso, node->sym, node->ip,
+ ts->kernel_start);
+
+ callchain_cursor_advance(&callchain_cursor);
+ }
+ callchain_param.order = saved_order;
+
+ if (current == &crp->cpr->call_path) {
+ /* Bail because the callchain was empty. */
+ return 1;
+ }
+
+ err = ts->crp->process_call_path(current,ts->crp->data);
+ return err;
+}
+
+int thread_stack__process(struct thread *thread, struct comm *comm,
+ struct perf_sample *sample,
+ struct addr_location *from_al,
+ struct addr_location *to_al, u64 ref,
+ struct call_return_processor *crp)
+{
+ struct thread_stack *ts = NULL;
+
+ int err = 0;
+
+ err = __thread_stack__process_init(thread, comm, crp);
+ if(err)
+ return err;
+
+ ts = thread->ts;
/* Flush stack on exec */
if (ts->comm != comm && thread->pid_ == thread->tid) {
@@ -717,8 +815,12 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
if (sample->flags & PERF_IP_FLAG_CALL) {
struct call_path_root *cpr = ts->crp->cpr;
struct call_path *cp;
+ struct dso *dso = NULL;
u64 ret_addr;
+ if(to_al->map)
+ dso = to_al->map->dso;
+
if (!sample->ip || !sample->addr)
return 0;
@@ -727,6 +829,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
return 0; /* Zero-length calls are excluded */
cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
+ to_al->machine, dso,
to_al->sym, sample->addr,
ts->kernel_start);
if (!cp)
@@ -734,11 +837,16 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
cp, false);
} else if (sample->flags & PERF_IP_FLAG_RETURN) {
+ struct dso *dso = NULL;
+ if(from_al->map)
+ dso = from_al->map->dso;
+
if (!sample->ip || !sample->addr)
return 0;
err = thread_stack__pop_cp(thread, ts, sample->addr,
- sample->time, ref, from_al->sym);
+ sample->time, ref, dso,
+ from_al->sym);
if (err) {
if (err < 0)
return err;
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index e1528f1..7b9615e 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -26,6 +26,7 @@ struct comm;
struct ip_callchain;
struct symbol;
struct dso;
+struct machine;
struct call_return_processor;
struct comm;
struct perf_sample;
@@ -83,6 +84,8 @@ struct call_return {
*/
struct call_path {
struct call_path *parent;
+ struct machine *machine;
+ struct dso *dso;
struct symbol *sym;
u64 ip;
u64 db_id;
@@ -100,9 +103,18 @@ int thread_stack__flush(struct thread *thread);
void thread_stack__free(struct thread *thread);
struct call_return_processor *
-call_return_processor__new(int (*process)(struct call_return *cr, void *data),
+call_return_processor__new(int (*process_call_path)(struct call_path *cp,
+ void *data),
+ int (*process_call_return)(struct call_return *cr,
+ void *data),
void *data);
void call_return_processor__free(struct call_return_processor *crp);
+
+int thread_stack__process_callchain(struct thread *thread, struct comm *comm,
+ struct perf_evsel *evsel,
+ struct machine *machine,
+ struct perf_sample *sample, int max_stack,
+ struct call_return_processor *crp);
int thread_stack__process(struct thread *thread, struct comm *comm,
struct perf_sample *sample,
struct addr_location *from_al,
--
2.7.4
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH 2/5] perf script: extend db-export api to include callchains for samples
2016-04-19 8:56 ` [PATCH 2/5] perf script: extend db-export api to include callchains for samples Chris Phlipot
@ 2016-04-22 7:56 ` Adrian Hunter
2016-04-23 4:41 ` Chris Phlipot
0 siblings, 1 reply; 13+ messages in thread
From: Adrian Hunter @ 2016-04-22 7:56 UTC (permalink / raw)
To: Chris Phlipot, acme, mingo, peterz; +Cc: linux-kernel
On 19/04/16 11:56, Chris Phlipot wrote:
> The current implementation of the python database export API only
> includes call path information when using some form of call/return
> tracing, but is unable to do so when sampling.
>
> The following API extensions allow exporting of data collected by
> perf record when using --call-graph.
>
> The additions to the python api include the following:
> - add call_path_id to sample_table to allow association of samples
> with call paths
>
> - add dso_id to call_path_table to more closely align the data with that
> of a callchain_node
The call_paths table already has symbol_id which belongs uniquely to a DSO,
so why do we need dso_id as well?
>
> db-export and trace-script-python were both extended to accomidate the API
accomidate -> accommodate
> changes listed above.
>
> Thread-stack's functionality was expanded to allow storage and exporting
> of callchains that result from individual samples.
>
> - Introduced a new static function (thread_stack__process_callchain) to
> resolve call paths using the existing callchain resolution provided by
> thread__resolve_callchain
>
> - The existing call_path tree in call_return_processor is used for storing
> the data from the resolved callchain.
>
> - Call_return_processor was also extended to allow the ability to export
> full call paths in addtion to the existing individual call/return pairs,
> since call/return pairs are not available when doing sampling.
Why do you need a callback? Seems like the only thing you need from
thread-stack.c is the call path tree. You could move that to its own .c/.h
files and then process the call chain in db-export.c
Also a list of changes like the one above heavily implies you are not
obeying the one patch == one change rule. Please try to make patches that
only do one thing and also run checkpatch.
If you don't mind, I'll let you respond to my comments before I comment on
any other patches.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/5] perf script: extend db-export api to include callchains for samples
2016-04-22 7:56 ` Adrian Hunter
@ 2016-04-23 4:41 ` Chris Phlipot
2016-04-28 8:36 ` Chris Phlipot
0 siblings, 1 reply; 13+ messages in thread
From: Chris Phlipot @ 2016-04-23 4:41 UTC (permalink / raw)
To: Adrian Hunter, acme, mingo, peterz; +Cc: linux-kernel
On 04/22/2016 12:56 AM, Adrian Hunter wrote:
> The call_paths table already has symbol_id which belongs uniquely to a DSO,
> so why do we need dso_id as well?
If the symbol_id is 0 because the IP could not be resolved to a symbol,
this is not necessarily a valid assumption. Without a dso_id in the
call_paths table, it is not possible to resolve the dso when symbol
information is missing. the db_export api currently does not have enough
information to match a DSO with an IP.
It is often useful to still have the call path associated with a DSO
even if there is no symbol, which i why i recommend keeping the dso_id
in the call_paths table.
> Why do you need a callback? Seems like the only thing you need from
> thread-stack.c is the call path tree. You could move that to its own .c/.h
> files and then process the call chain in db-export.c
My original intent was to reuse existing code with minimal changes and
conform the existing design patterns they used. Thread-stack.c, for
example, currently uses a callback to populate the call_return table, so
I used a callback as well to populate the call_path table.
I am open to making this change if it is believed it will result in a
cleaner implementation.
>
> Also a list of changes like the one above heavily implies you are not
> obeying the one patch == one change rule. Please try to make patches that
> only do one thing and also run checkpatch.
While i can split this into a few smaller patches there is only really
justification for applying all of them all together. If this is still
preferred i can resubmit this in smaller parts.
>
> If you don't mind, I'll let you respond to my comments before I comment on
> any other patches.
>
Let me know if you have any additional comments.
Thanks,
Chris
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/5] perf script: extend db-export api to include callchains for samples
2016-04-23 4:41 ` Chris Phlipot
@ 2016-04-28 8:36 ` Chris Phlipot
0 siblings, 0 replies; 13+ messages in thread
From: Chris Phlipot @ 2016-04-28 8:36 UTC (permalink / raw)
To: Adrian Hunter, acme, mingo, peterz; +Cc: linux-kernel
Hi Adrian,
I have just resubmitted these changes as a new patch set, which I believe
should address most of your concerns. Please review the new patch set
instead of continuing with this one.
https://lkml.org/lkml/2016/4/28/75
Thanks,
Chris
On 04/22/2016 09:41 PM, Chris Phlipot wrote:
>
>
> On 04/22/2016 12:56 AM, Adrian Hunter wrote:
>> The call_paths table already has symbol_id which belongs uniquely to
>> a DSO,
>> so why do we need dso_id as well?
> If the symbol_id is 0 because the IP could not be resolved to a
> symbol, this is not necessarily a valid assumption. Without a dso_id
> in the call_paths table, it is not possible to resolve the dso when
> symbol information is missing. the db_export api currently does not
> have enough information to match a DSO with an IP.
>
> It is often useful to still have the call path associated with a DSO
> even if there is no symbol, which i why i recommend keeping the dso_id
> in the call_paths table.
>> Why do you need a callback? Seems like the only thing you need from
>> thread-stack.c is the call path tree. You could move that to its own
>> .c/.h
>> files and then process the call chain in db-export.c
> My original intent was to reuse existing code with minimal changes and
> conform the existing design patterns they used. Thread-stack.c, for
> example, currently uses a callback to populate the call_return table,
> so I used a callback as well to populate the call_path table.
>
> I am open to making this change if it is believed it will result in a
> cleaner implementation.
>>
>> Also a list of changes like the one above heavily implies you are not
>> obeying the one patch == one change rule. Please try to make patches
>> that
>> only do one thing and also run checkpatch.
> While i can split this into a few smaller patches there is only really
> justification for applying all of them all together. If this is still
> preferred i can resubmit this in smaller parts.
>>
>> If you don't mind, I'll let you respond to my comments before I
>> comment on
>> any other patches.
>>
> Let me know if you have any additional comments.
>
> Thanks,
> Chris
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 3/5] perf script: fix postgresql ubuntu install instructions
2016-04-19 8:56 [PATCH 1/5] perf tools: fix incorrect ordering of callchain entries Chris Phlipot
2016-04-19 8:56 ` [PATCH 2/5] perf script: extend db-export api to include callchains for samples Chris Phlipot
@ 2016-04-19 8:56 ` Chris Phlipot
2016-04-19 13:41 ` Arnaldo Carvalho de Melo
2016-04-23 13:03 ` [tip:perf/core] perf script: Fix " tip-bot for Chris Phlipot
2016-04-19 8:56 ` [PATCH 4/5] perf script: add option to control callchain export from python Chris Phlipot
` (2 subsequent siblings)
4 siblings, 2 replies; 13+ messages in thread
From: Chris Phlipot @ 2016-04-19 8:56 UTC (permalink / raw)
To: acme, adrian.hunter, mingo, peterz; +Cc: linux-kernel, Chris Phlipot
The current instructions for setting up an Ubuntu system for using
the export-to-postgresql.py script are incorrect.
The instructions in the script have been updated to work on newer
versions of ubuntu.
-Add missing dependencies to apt-get command:
python-pyside.qtsql, libqt4-sql-psql
-Add '-s' option to createuser command to force the user to be a
superuser since the command doesn't prompt as indicated in the
current instructions.
Tested on: Ubuntu 14.04, Ubuntu 16.04(beta)
Signed-off-by: Chris Phlipot <cphlipot0@gmail.com>
---
tools/perf/scripts/python/export-to-postgresql.py | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index 1b02cdc..6f0ca68 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -34,10 +34,9 @@ import datetime
#
# ubuntu:
#
-# $ sudo apt-get install postgresql
+# $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql
# $ sudo su - postgres
-# $ createuser <your user id here>
-# Shall the new role be a superuser? (y/n) y
+# $ createuser -s <your user id here>
#
# An example of using this script with Intel PT:
#
--
2.7.4
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH 3/5] perf script: fix postgresql ubuntu install instructions
2016-04-19 8:56 ` [PATCH 3/5] perf script: fix postgresql ubuntu install instructions Chris Phlipot
@ 2016-04-19 13:41 ` Arnaldo Carvalho de Melo
2016-04-23 13:03 ` [tip:perf/core] perf script: Fix " tip-bot for Chris Phlipot
1 sibling, 0 replies; 13+ messages in thread
From: Arnaldo Carvalho de Melo @ 2016-04-19 13:41 UTC (permalink / raw)
To: Chris Phlipot; +Cc: adrian.hunter, mingo, peterz, linux-kernel
Em Tue, Apr 19, 2016 at 01:56:02AM -0700, Chris Phlipot escreveu:
> The current instructions for setting up an Ubuntu system for using
> the export-to-postgresql.py script are incorrect.
>
> The instructions in the script have been updated to work on newer
> versions of ubuntu.
>
> -Add missing dependencies to apt-get command:
> python-pyside.qtsql, libqt4-sql-psql
> -Add '-s' option to createuser command to force the user to be a
> superuser since the command doesn't prompt as indicated in the
> current instructions.
>
> Tested on: Ubuntu 14.04, Ubuntu 16.04(beta)
Applied.
> Signed-off-by: Chris Phlipot <cphlipot0@gmail.com>
> ---
> tools/perf/scripts/python/export-to-postgresql.py | 5 ++---
> 1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
> index 1b02cdc..6f0ca68 100644
> --- a/tools/perf/scripts/python/export-to-postgresql.py
> +++ b/tools/perf/scripts/python/export-to-postgresql.py
> @@ -34,10 +34,9 @@ import datetime
> #
> # ubuntu:
> #
> -# $ sudo apt-get install postgresql
> +# $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql
> # $ sudo su - postgres
> -# $ createuser <your user id here>
> -# Shall the new role be a superuser? (y/n) y
> +# $ createuser -s <your user id here>
> #
> # An example of using this script with Intel PT:
> #
> --
> 2.7.4
^ permalink raw reply [flat|nested] 13+ messages in thread
* [tip:perf/core] perf script: Fix postgresql ubuntu install instructions
2016-04-19 8:56 ` [PATCH 3/5] perf script: fix postgresql ubuntu install instructions Chris Phlipot
2016-04-19 13:41 ` Arnaldo Carvalho de Melo
@ 2016-04-23 13:03 ` tip-bot for Chris Phlipot
1 sibling, 0 replies; 13+ messages in thread
From: tip-bot for Chris Phlipot @ 2016-04-23 13:03 UTC (permalink / raw)
To: linux-tip-commits
Cc: acme, cphlipot0, tglx, linux-kernel, mingo, adrian.hunter, hpa,
peterz
Commit-ID: d6632dd59b66c89724ef28e2723586d1429382aa
Gitweb: http://git.kernel.org/tip/d6632dd59b66c89724ef28e2723586d1429382aa
Author: Chris Phlipot <cphlipot0@gmail.com>
AuthorDate: Tue, 19 Apr 2016 01:56:02 -0700
Committer: Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Tue, 19 Apr 2016 12:36:54 -0300
perf script: Fix postgresql ubuntu install instructions
The current instructions for setting up an Ubuntu system for using the
export-to-postgresql.py script are incorrect.
The instructions in the script have been updated to work on newer
versions of ubuntu.
-Add missing dependencies to apt-get command:
python-pyside.qtsql, libqt4-sql-psql
-Add '-s' option to createuser command to force the user to be a
superuser since the command doesn't prompt as indicated in the
current instructions.
Tested on: Ubuntu 14.04, Ubuntu 16.04(beta)
Signed-off-by: Chris Phlipot <cphlipot0@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1461056164-14914-3-git-send-email-cphlipot0@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/scripts/python/export-to-postgresql.py | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index 1b02cdc..6f0ca68 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -34,10 +34,9 @@ import datetime
#
# ubuntu:
#
-# $ sudo apt-get install postgresql
+# $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql
# $ sudo su - postgres
-# $ createuser <your user id here>
-# Shall the new role be a superuser? (y/n) y
+# $ createuser -s <your user id here>
#
# An example of using this script with Intel PT:
#
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 4/5] perf script: add option to control callchain export from python
2016-04-19 8:56 [PATCH 1/5] perf tools: fix incorrect ordering of callchain entries Chris Phlipot
2016-04-19 8:56 ` [PATCH 2/5] perf script: extend db-export api to include callchains for samples Chris Phlipot
2016-04-19 8:56 ` [PATCH 3/5] perf script: fix postgresql ubuntu install instructions Chris Phlipot
@ 2016-04-19 8:56 ` Chris Phlipot
2016-04-19 8:56 ` [PATCH 5/5] perf script: Update export-to-postgresql to support callchains Chris Phlipot
2016-04-22 7:55 ` [PATCH 1/5] perf tools: fix incorrect ordering of callchain entries Adrian Hunter
4 siblings, 0 replies; 13+ messages in thread
From: Chris Phlipot @ 2016-04-19 8:56 UTC (permalink / raw)
To: acme, adrian.hunter, mingo, peterz; +Cc: linux-kernel, Chris Phlipot
resolving the callchain for every sample can be very expensive.
This commit introduces a new option "perf_db_export_callchains"
which will be enabled when it is set to True from within the python
script.
Signed-off-by: Chris Phlipot <cphlipot0@gmail.com>
---
tools/perf/util/db-export.c | 2 +-
tools/perf/util/db-export.h | 1 +
.../util/scripting-engines/trace-event-python.c | 22 +++++++++++++++++++++-
3 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 69c9a9d..3bd8247 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -330,7 +330,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
goto out_put;
dbe->call_path_last_seen_db_id = 0;
- if(dbe->crp) {
+ if(dbe->crp && dbe->export_callchains) {
thread_stack__process_callchain(thread, comm, evsel,
al->machine, sample,
PERF_MAX_STACK_DEPTH, dbe->crp);
diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h
index 40e3b07..b75fea9 100644
--- a/tools/perf/util/db-export.h
+++ b/tools/perf/util/db-export.h
@@ -77,6 +77,7 @@ struct db_export {
u64 call_path_last_seen_db_id; /* last db_id seen(exported or not) */
u64 call_return_last_db_id;
struct list_head deferred;
+ bool export_callchains;
};
int db_export__init(struct db_export *dbe);
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index ca3f9c6..2e67b35 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1008,8 +1008,10 @@ error:
static void set_table_handlers(struct tables *tables)
{
const char *perf_db_export_mode = "perf_db_export_mode";
+ const char *perf_db_export_callchains = "perf_db_export_callchains";
const char *perf_db_export_calls = "perf_db_export_calls";
- PyObject *db_export_mode, *db_export_calls;
+ PyObject *db_export_mode, *db_export_callchains, *db_export_calls;
+ bool export_callchains = false;
bool export_calls = false;
int ret;
@@ -1028,6 +1030,16 @@ static void set_table_handlers(struct tables *tables)
return;
tables->dbe.crp = NULL;
+
+ db_export_callchains = PyDict_GetItemString(main_dict,
+ perf_db_export_callchains);
+ if (db_export_callchains) {
+ ret = PyObject_IsTrue(db_export_callchains);
+ if (ret == -1)
+ handler_call_die(perf_db_export_callchains);
+ export_callchains = !!ret;
+ }
+
db_export_calls = PyDict_GetItemString(main_dict, perf_db_export_calls);
if (db_export_calls) {
ret = PyObject_IsTrue(db_export_calls);
@@ -1043,9 +1055,17 @@ static void set_table_handlers(struct tables *tables)
&tables->dbe);
if (!tables->dbe.crp)
Py_FatalError("failed to create calls processor");
+ } else if (export_callchains) {
+ tables->dbe.crp =
+ call_return_processor__new(python_process_call_path,
+ NULL,
+ &tables->dbe);
+ if (!tables->dbe.crp)
+ Py_FatalError("failed to create calls processor");
}
tables->db_export_mode = true;
+ tables->dbe.export_callchains = export_callchains;
/*
* Reserve per symbol space for symbol->db_id via symbol__priv()
*/
--
2.7.4
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 5/5] perf script: Update export-to-postgresql to support callchains
2016-04-19 8:56 [PATCH 1/5] perf tools: fix incorrect ordering of callchain entries Chris Phlipot
` (2 preceding siblings ...)
2016-04-19 8:56 ` [PATCH 4/5] perf script: add option to control callchain export from python Chris Phlipot
@ 2016-04-19 8:56 ` Chris Phlipot
2016-04-22 7:55 ` [PATCH 1/5] perf tools: fix incorrect ordering of callchain entries Adrian Hunter
4 siblings, 0 replies; 13+ messages in thread
From: Chris Phlipot @ 2016-04-19 8:56 UTC (permalink / raw)
To: acme, adrian.hunter, mingo, peterz; +Cc: linux-kernel, Chris Phlipot
Update the export-to-postgresql.py to support the newly introduced
callchain export.
callchains are added into the existing call_paths table and can now
be associated with samples when the "callpaths" commandline option
is used with the script.
Includes the following changes:
-Now set the perf_db_export_callchains global
-Add "callchains" commandline option"
-Add perf_db_export_callchains checks for call_path table creation
and population.
-Add call_path_id to samples_table
-Add dso_id to call_paths_table
Signed-off-by: Chris Phlipot <cphlipot0@gmail.com>
---
tools/perf/scripts/python/export-to-postgresql.py | 60 ++++++++++++++---------
1 file changed, 37 insertions(+), 23 deletions(-)
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index 6f0ca68..e64009f 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -223,11 +223,14 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
perf_db_export_mode = True
perf_db_export_calls = False
+perf_db_export_callchains = False
+
def usage():
- print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>]"
+ print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]"
print >> sys.stderr, "where: columns 'all' or 'branches'"
- print >> sys.stderr, " calls 'calls' => create calls table"
+ print >> sys.stderr, " calls 'calls' => create calls and call_paths table"
+ print >> sys.stderr, " callchains 'callchains' => create call_paths table"
raise Exception("Too few arguments")
if (len(sys.argv) < 2):
@@ -245,12 +248,14 @@ if columns not in ("all", "branches"):
branches = (columns == "branches")
-if (len(sys.argv) >= 4):
- if (sys.argv[3] == "calls"):
+for i in range(3,len(sys.argv)):
+ if (sys.argv[i] == "calls"):
perf_db_export_calls = True
+ elif (sys.argv[i] == "callchains"):
+ perf_db_export_callchains = True
else:
usage()
-
+
output_dir_name = os.getcwd() + "/" + dbname + "-perf-data"
os.mkdir(output_dir_name)
@@ -358,14 +363,17 @@ else:
'transaction bigint,'
'data_src bigint,'
'branch_type integer,'
- 'in_tx boolean)')
+ 'in_tx boolean,'
+ 'call_path_id bigint)')
-if perf_db_export_calls:
+if perf_db_export_calls or perf_db_export_callchains:
do_query(query, 'CREATE TABLE call_paths ('
'id bigint NOT NULL,'
'parent_id bigint,'
'symbol_id bigint,'
- 'ip bigint)')
+ 'ip bigint,'
+ 'dso_id bigint)')
+if perf_db_export_calls:
do_query(query, 'CREATE TABLE calls ('
'id bigint NOT NULL,'
'thread_id bigint,'
@@ -427,7 +435,7 @@ do_query(query, 'CREATE VIEW comm_threads_view AS '
'(SELECT tid FROM threads WHERE id = thread_id) AS tid'
' FROM comm_threads')
-if perf_db_export_calls:
+if perf_db_export_calls or perf_db_export_callchains:
do_query(query, 'CREATE VIEW call_paths_view AS '
'SELECT '
'c.id,'
@@ -443,6 +451,7 @@ if perf_db_export_calls:
'(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
'(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name'
' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
+if perf_db_export_calls:
do_query(query, 'CREATE VIEW calls_view AS '
'SELECT '
'calls.id,'
@@ -540,8 +549,9 @@ dso_file = open_output_file("dso_table.bin")
symbol_file = open_output_file("symbol_table.bin")
branch_type_file = open_output_file("branch_type_table.bin")
sample_file = open_output_file("sample_table.bin")
-if perf_db_export_calls:
+if perf_db_export_calls or perf_db_export_callchains:
call_path_file = open_output_file("call_path_table.bin")
+if perf_db_export_calls:
call_file = open_output_file("call_table.bin")
def trace_begin():
@@ -553,9 +563,9 @@ def trace_begin():
comm_table(0, "unknown")
dso_table(0, 0, "unknown", "unknown", "")
symbol_table(0, 0, 0, 0, 0, "unknown")
- sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
- if perf_db_export_calls:
- call_path_table(0, 0, 0, 0)
+ sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+ if perf_db_export_calls or perf_db_export_callchains:
+ call_path_table(0, 0, 0, 0, 0)
unhandled_count = 0
@@ -570,8 +580,9 @@ def trace_end():
copy_output_file(symbol_file, "symbols")
copy_output_file(branch_type_file, "branch_types")
copy_output_file(sample_file, "samples")
- if perf_db_export_calls:
+ if perf_db_export_calls or perf_db_export_callchains:
copy_output_file(call_path_file, "call_paths")
+ if perf_db_export_calls:
copy_output_file(call_file, "calls")
print datetime.datetime.today(), "Removing intermediate files..."
@@ -584,8 +595,9 @@ def trace_end():
remove_output_file(symbol_file)
remove_output_file(branch_type_file)
remove_output_file(sample_file)
- if perf_db_export_calls:
+ if perf_db_export_calls or perf_db_export_callchains:
remove_output_file(call_path_file)
+ if perf_db_export_calls:
remove_output_file(call_file)
os.rmdir(output_dir_name)
print datetime.datetime.today(), "Adding primary keys"
@@ -598,8 +610,9 @@ def trace_end():
do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)')
do_query(query, 'ALTER TABLE branch_types ADD PRIMARY KEY (id)')
do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)')
- if perf_db_export_calls:
+ if perf_db_export_calls or perf_db_export_callchains:
do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)')
+ if perf_db_export_calls:
do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)')
print datetime.datetime.today(), "Adding foreign keys"
@@ -622,10 +635,11 @@ def trace_end():
'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id),'
'ADD CONSTRAINT todsofk FOREIGN KEY (to_dso_id) REFERENCES dsos (id),'
'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols (id)')
- if perf_db_export_calls:
+ if perf_db_export_calls or perf_db_export_callchains:
do_query(query, 'ALTER TABLE call_paths '
'ADD CONSTRAINT parentfk FOREIGN KEY (parent_id) REFERENCES call_paths (id),'
'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id)')
+ if perf_db_export_calls:
do_query(query, 'ALTER TABLE calls '
'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),'
'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
@@ -693,16 +707,16 @@ def branch_type_table(branch_type, name, *x):
value = struct.pack(fmt, 2, 4, branch_type, n, name)
branch_type_file.write(value)
-def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x):
+def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x):
if branches:
- value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx)
+ value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id)
else:
- value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx)
+ value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id)
sample_file.write(value)
-def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
- fmt = "!hiqiqiqiq"
- value = struct.pack(fmt, 4, 8, cp_id, 8, parent_id, 8, symbol_id, 8, ip)
+def call_path_table(cp_id, parent_id, symbol_id, ip, dso_id, *x):
+ fmt = "!hiqiqiqiqiq"
+ value = struct.pack(fmt, 5, 8, cp_id, 8, parent_id, 8, symbol_id, 8, ip, 8, dso_id)
call_path_file.write(value)
def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, *x):
--
2.7.4
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH 1/5] perf tools: fix incorrect ordering of callchain entries
2016-04-19 8:56 [PATCH 1/5] perf tools: fix incorrect ordering of callchain entries Chris Phlipot
` (3 preceding siblings ...)
2016-04-19 8:56 ` [PATCH 5/5] perf script: Update export-to-postgresql to support callchains Chris Phlipot
@ 2016-04-22 7:55 ` Adrian Hunter
2016-04-22 7:59 ` Adrian Hunter
4 siblings, 1 reply; 13+ messages in thread
From: Adrian Hunter @ 2016-04-22 7:55 UTC (permalink / raw)
To: Chris Phlipot, acme, mingo, peterz; +Cc: linux-kernel
On 19/04/16 11:56, Chris Phlipot wrote:
> The existing implentation implementation of thread__resolve_callchain,
Remove 'implentation'
> under certain circumstanes, can assemble callchain entries in the
'circumstanes' -> 'circumstances'
> incorrect order.
>
> A the callchain entries are resolved incorrectly for a sample when all
> of the following conditions are met:
>
> 1. callchain_param.order is set to ORDER_CALLER
>
> 2. thread__resolve_callchain_sample is able to resolve callchain entries
> for the sample.
>
> 3. unwind__get_entries is also able to resolve callchain entries for the
> sample.
>
> The fix is accomplished by reversing the order in which
> thread__resolve_callchain_sample and unwind__get_entries are called
> when callchain_param.order is set to ORDER_CALLER.
Can you give an example of the commands you used and what the call chain
looked like before and after.
Also please run ./scripts/checkpatch.pl
>
> Unwind specific code from thread__resolve_callchain is also moved into a
> new static function to improve readability of the fix.
>
> Signed-off-by: Chris Phlipot <cphlipot0@gmail.com>
> ---
> tools/perf/util/machine.c | 57 ++++++++++++++++++++++++++++++++++-------------
> 1 file changed, 41 insertions(+), 16 deletions(-)
>
> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> index 0c4dabc..dd086c8 100644
> --- a/tools/perf/util/machine.c
> +++ b/tools/perf/util/machine.c
> @@ -1806,8 +1806,6 @@ static int thread__resolve_callchain_sample(struct thread *thread,
> int skip_idx = -1;
> int first_call = 0;
>
> - callchain_cursor_reset(cursor);
> -
> if (has_branch_callstack(evsel)) {
> err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
> root_al, max_stack);
> @@ -1918,20 +1916,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
> entry->map, entry->sym);
> }
>
> -int thread__resolve_callchain(struct thread *thread,
> - struct callchain_cursor *cursor,
> - struct perf_evsel *evsel,
> - struct perf_sample *sample,
> - struct symbol **parent,
> - struct addr_location *root_al,
> - int max_stack)
> -{
> - int ret = thread__resolve_callchain_sample(thread, cursor, evsel,
> - sample, parent,
> - root_al, max_stack);
> - if (ret)
> - return ret;
> -
> +static int thread__resolve_callchain_unwind(struct thread *thread,
> + struct callchain_cursor *cursor,
> + struct perf_evsel *evsel,
> + struct perf_sample *sample,
> + int max_stack)
> +{
> /* Can we do dwarf post unwind? */
> if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) &&
> (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER)))
> @@ -1944,7 +1934,42 @@ int thread__resolve_callchain(struct thread *thread,
>
> return unwind__get_entries(unwind_entry, cursor,
> thread, sample, max_stack);
> +}
> +
> +int thread__resolve_callchain(struct thread *thread,
> + struct callchain_cursor *cursor,
> + struct perf_evsel *evsel,
> + struct perf_sample *sample,
> + struct symbol **parent,
> + struct addr_location *root_al,
> + int max_stack)
> +{
> + int ret = 0;
> + callchain_cursor_reset(&callchain_cursor);
> +
> + if (callchain_param.order == ORDER_CALLEE) {
> + ret = thread__resolve_callchain_sample(thread, cursor,
> + evsel, sample,
> + parent, root_al,
> + max_stack);
> + if (ret)
> + return ret;
> + ret = thread__resolve_callchain_unwind(thread, cursor,
> + evsel, sample,
> + max_stack);
> + } else {
> + ret = thread__resolve_callchain_unwind(thread, cursor,
> + evsel, sample,
> + max_stack);
> + if (ret)
> + return ret;
> + ret = thread__resolve_callchain_sample(thread, cursor,
> + evsel, sample,
> + parent, root_al,
> + max_stack);
> + }
>
> + return ret;
> }
>
> int machine__for_each_thread(struct machine *machine,
>
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH 1/5] perf tools: fix incorrect ordering of callchain entries
2016-04-22 7:55 ` [PATCH 1/5] perf tools: fix incorrect ordering of callchain entries Adrian Hunter
@ 2016-04-22 7:59 ` Adrian Hunter
2016-04-23 3:35 ` Chris Phlipot
0 siblings, 1 reply; 13+ messages in thread
From: Adrian Hunter @ 2016-04-22 7:59 UTC (permalink / raw)
To: Adrian Hunter, Chris Phlipot, acme, mingo, peterz; +Cc: linux-kernel, Jiri Olsa
+Jiri since he wrote the original code
On 22/04/16 10:55, Adrian Hunter wrote:
> On 19/04/16 11:56, Chris Phlipot wrote:
>> The existing implentation implementation of thread__resolve_callchain,
>
> Remove 'implentation'
>
>> under certain circumstanes, can assemble callchain entries in the
>
> 'circumstanes' -> 'circumstances'
>
>> incorrect order.
>>
>> A the callchain entries are resolved incorrectly for a sample when all
>> of the following conditions are met:
>>
>> 1. callchain_param.order is set to ORDER_CALLER
>>
>> 2. thread__resolve_callchain_sample is able to resolve callchain entries
>> for the sample.
>>
>> 3. unwind__get_entries is also able to resolve callchain entries for the
>> sample.
>>
>> The fix is accomplished by reversing the order in which
>> thread__resolve_callchain_sample and unwind__get_entries are called
>> when callchain_param.order is set to ORDER_CALLER.
>
> Can you give an example of the commands you used and what the call chain
> looked like before and after.
>
> Also please run ./scripts/checkpatch.pl
>
>>
>> Unwind specific code from thread__resolve_callchain is also moved into a
>> new static function to improve readability of the fix.
>>
>> Signed-off-by: Chris Phlipot <cphlipot0@gmail.com>
>> ---
>> tools/perf/util/machine.c | 57 ++++++++++++++++++++++++++++++++++-------------
>> 1 file changed, 41 insertions(+), 16 deletions(-)
>>
>> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
>> index 0c4dabc..dd086c8 100644
>> --- a/tools/perf/util/machine.c
>> +++ b/tools/perf/util/machine.c
>> @@ -1806,8 +1806,6 @@ static int thread__resolve_callchain_sample(struct thread *thread,
>> int skip_idx = -1;
>> int first_call = 0;
>>
>> - callchain_cursor_reset(cursor);
>> -
>> if (has_branch_callstack(evsel)) {
>> err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
>> root_al, max_stack);
>> @@ -1918,20 +1916,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
>> entry->map, entry->sym);
>> }
>>
>> -int thread__resolve_callchain(struct thread *thread,
>> - struct callchain_cursor *cursor,
>> - struct perf_evsel *evsel,
>> - struct perf_sample *sample,
>> - struct symbol **parent,
>> - struct addr_location *root_al,
>> - int max_stack)
>> -{
>> - int ret = thread__resolve_callchain_sample(thread, cursor, evsel,
>> - sample, parent,
>> - root_al, max_stack);
>> - if (ret)
>> - return ret;
>> -
>> +static int thread__resolve_callchain_unwind(struct thread *thread,
>> + struct callchain_cursor *cursor,
>> + struct perf_evsel *evsel,
>> + struct perf_sample *sample,
>> + int max_stack)
>> +{
>> /* Can we do dwarf post unwind? */
>> if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) &&
>> (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER)))
>> @@ -1944,7 +1934,42 @@ int thread__resolve_callchain(struct thread *thread,
>>
>> return unwind__get_entries(unwind_entry, cursor,
>> thread, sample, max_stack);
>> +}
>> +
>> +int thread__resolve_callchain(struct thread *thread,
>> + struct callchain_cursor *cursor,
>> + struct perf_evsel *evsel,
>> + struct perf_sample *sample,
>> + struct symbol **parent,
>> + struct addr_location *root_al,
>> + int max_stack)
>> +{
>> + int ret = 0;
>> + callchain_cursor_reset(&callchain_cursor);
>> +
>> + if (callchain_param.order == ORDER_CALLEE) {
>> + ret = thread__resolve_callchain_sample(thread, cursor,
>> + evsel, sample,
>> + parent, root_al,
>> + max_stack);
>> + if (ret)
>> + return ret;
>> + ret = thread__resolve_callchain_unwind(thread, cursor,
>> + evsel, sample,
>> + max_stack);
>> + } else {
>> + ret = thread__resolve_callchain_unwind(thread, cursor,
>> + evsel, sample,
>> + max_stack);
>> + if (ret)
>> + return ret;
>> + ret = thread__resolve_callchain_sample(thread, cursor,
>> + evsel, sample,
>> + parent, root_al,
>> + max_stack);
>> + }
>>
>> + return ret;
>> }
>>
>> int machine__for_each_thread(struct machine *machine,
>>
>
>
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH 1/5] perf tools: fix incorrect ordering of callchain entries
2016-04-22 7:59 ` Adrian Hunter
@ 2016-04-23 3:35 ` Chris Phlipot
0 siblings, 0 replies; 13+ messages in thread
From: Chris Phlipot @ 2016-04-23 3:35 UTC (permalink / raw)
To: Adrian Hunter, acme, mingo, peterz; +Cc: linux-kernel, Jiri Olsa
Modifying perf script to print call trees in the opposite order or
applying patch 2 from this series and comparing the results output from
export-to-postgtresql.py are the easiest ways to see the bug, however it
can still be seen in current builds using perf report.
Here is how i can reproduce the bug using perf report:
$ ./perf record --call-graph=dwarf stress -c 1 -t 5
when i run this command:
$./perf report --call-graph=flat,0,0,callee
i see this calltree contained in the output, which looks correct (callee
order):
gen8_irq_handler
handle_irq_event_percpu
handle_irq_event
handle_edge_irq
handle_irq
do_IRQ
ret_from_intr
__random
rand
0x558f2a04dded
0x558f2a04c774
__libc_start_main
0x558f2a04dcd9
Now I run this command:
$./perf report --call-graph=flat,0,0,caller
I would expect to see the exact reverse of the above when using caller
order(with "0x558f2a04dcd9" at the top and "gen8_irq_handler" at the
bottom) in the output, but it is nowhere to be found.
instead you see this:
ret_from_intr
do_IRQ
handle_irq
handle_edge_irq
handle_irq_event
handle_irq_event_percpu
gen8_irq_handler
0x558f2a04dcd9
__libc_start_main
0x558f2a04c774
0x558f2a04dded
rand
__random
Notice how internally the kernel symbols are reversed and the user space
symbols are reversed, but the kernel symbols still appear above the user
space symbols.
if you apply this patch and re-run, you will see the expected output
(with "0x558f2a04dcd9" at the top and "gen8_irq_handler" at the bottom):
0x558f2a04dcd9
__libc_start_main
0x558f2a04c774
0x558f2a04dded
rand
__random
ret_from_intr
do_IRQ
handle_irq
handle_edge_irq
handle_irq_event
handle_irq_event_percpu
gen8_irq_handler
Thanks,
Chris
On 04/22/2016 12:59 AM, Adrian Hunter wrote:
> +Jiri since he wrote the original code
>
> On 22/04/16 10:55, Adrian Hunter wrote:
>> On 19/04/16 11:56, Chris Phlipot wrote:
>>> The existing implentation implementation of thread__resolve_callchain,
>> Remove 'implentation'
>>
>>> under certain circumstanes, can assemble callchain entries in the
>> 'circumstanes' -> 'circumstances'
>>
>>> incorrect order.
>>>
>>> A the callchain entries are resolved incorrectly for a sample when all
>>> of the following conditions are met:
>>>
>>> 1. callchain_param.order is set to ORDER_CALLER
>>>
>>> 2. thread__resolve_callchain_sample is able to resolve callchain entries
>>> for the sample.
>>>
>>> 3. unwind__get_entries is also able to resolve callchain entries for the
>>> sample.
>>>
>>> The fix is accomplished by reversing the order in which
>>> thread__resolve_callchain_sample and unwind__get_entries are called
>>> when callchain_param.order is set to ORDER_CALLER.
>> Can you give an example of the commands you used and what the call chain
>> looked like before and after.
>>
>> Also please run ./scripts/checkpatch.pl
>>
>>> Unwind specific code from thread__resolve_callchain is also moved into a
>>> new static function to improve readability of the fix.
>>>
>>> Signed-off-by: Chris Phlipot <cphlipot0@gmail.com>
>>> ---
>>> tools/perf/util/machine.c | 57 ++++++++++++++++++++++++++++++++++-------------
>>> 1 file changed, 41 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
>>> index 0c4dabc..dd086c8 100644
>>> --- a/tools/perf/util/machine.c
>>> +++ b/tools/perf/util/machine.c
>>> @@ -1806,8 +1806,6 @@ static int thread__resolve_callchain_sample(struct thread *thread,
>>> int skip_idx = -1;
>>> int first_call = 0;
>>>
>>> - callchain_cursor_reset(cursor);
>>> -
>>> if (has_branch_callstack(evsel)) {
>>> err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
>>> root_al, max_stack);
>>> @@ -1918,20 +1916,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
>>> entry->map, entry->sym);
>>> }
>>>
>>> -int thread__resolve_callchain(struct thread *thread,
>>> - struct callchain_cursor *cursor,
>>> - struct perf_evsel *evsel,
>>> - struct perf_sample *sample,
>>> - struct symbol **parent,
>>> - struct addr_location *root_al,
>>> - int max_stack)
>>> -{
>>> - int ret = thread__resolve_callchain_sample(thread, cursor, evsel,
>>> - sample, parent,
>>> - root_al, max_stack);
>>> - if (ret)
>>> - return ret;
>>> -
>>> +static int thread__resolve_callchain_unwind(struct thread *thread,
>>> + struct callchain_cursor *cursor,
>>> + struct perf_evsel *evsel,
>>> + struct perf_sample *sample,
>>> + int max_stack)
>>> +{
>>> /* Can we do dwarf post unwind? */
>>> if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) &&
>>> (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER)))
>>> @@ -1944,7 +1934,42 @@ int thread__resolve_callchain(struct thread *thread,
>>>
>>> return unwind__get_entries(unwind_entry, cursor,
>>> thread, sample, max_stack);
>>> +}
>>> +
>>> +int thread__resolve_callchain(struct thread *thread,
>>> + struct callchain_cursor *cursor,
>>> + struct perf_evsel *evsel,
>>> + struct perf_sample *sample,
>>> + struct symbol **parent,
>>> + struct addr_location *root_al,
>>> + int max_stack)
>>> +{
>>> + int ret = 0;
>>> + callchain_cursor_reset(&callchain_cursor);
>>> +
>>> + if (callchain_param.order == ORDER_CALLEE) {
>>> + ret = thread__resolve_callchain_sample(thread, cursor,
>>> + evsel, sample,
>>> + parent, root_al,
>>> + max_stack);
>>> + if (ret)
>>> + return ret;
>>> + ret = thread__resolve_callchain_unwind(thread, cursor,
>>> + evsel, sample,
>>> + max_stack);
>>> + } else {
>>> + ret = thread__resolve_callchain_unwind(thread, cursor,
>>> + evsel, sample,
>>> + max_stack);
>>> + if (ret)
>>> + return ret;
>>> + ret = thread__resolve_callchain_sample(thread, cursor,
>>> + evsel, sample,
>>> + parent, root_al,
>>> + max_stack);
>>> + }
>>>
>>> + return ret;
>>> }
>>>
>>> int machine__for_each_thread(struct machine *machine,
>>>
>>
^ permalink raw reply [flat|nested] 13+ messages in thread