* [PATCH] perf/powerpc: Cache the DWARF debug info @ 2014-10-21 18:56 Sukadev Bhattiprolu 2014-10-21 19:08 ` Arnaldo Carvalho de Melo 0 siblings, 1 reply; 12+ messages in thread From: Sukadev Bhattiprolu @ 2014-10-21 18:56 UTC (permalink / raw) To: Arnaldo Carvalho de Melo, Jiri Olsa; +Cc: Anton Blanchard, linux-kernel >From 773a3608a0cd2daf02e244cb9ffbf5bb6a0e724e Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> Date: Tue, 21 Oct 2014 13:20:22 -0500 Subject: [PATCH 1/1] perf/powerpc: Cache DWARF debug info Cache the DWARF debug info for DSO so we don't have to rebuild it for each address in the DSO (duh!). $ time /tmp/perf.orig report -g > /tmp/report.1 real 0m1.845s user 0m0.963s sys 0m0.879s $ time /tmp/perf report -g > /tmp/report.2 real 0m0.089s user 0m0.082s sys 0m0.006s $ diff /tmp/report.1 /tmp/report.2 $ Reported-by: Anton Blanchard <anton@samba.org> Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> --- tools/perf/arch/powerpc/util/skip-callchain-idx.c | 91 ++++++++++++++++++++--- 1 file changed, 82 insertions(+), 9 deletions(-) diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index d73ef8b..bfe254d 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -38,6 +38,63 @@ static const Dwfl_Callbacks offline_callbacks = { .section_address = dwfl_offline_section_address, }; +struct list_head perf_dwfl_files; + +struct perf_dwfl_node { + struct list_head node; + char *exec_file; + Dwfl *dwfl; +}; + +static void perf_init_dwfl(void) +{ + static int dwfl_inited; + + if (!dwfl_inited) { + dwfl_inited = 1; + INIT_LIST_HEAD(&perf_dwfl_files); + } +} + +static Dwfl *perf_dwfl_find(const char *file_name) +{ + struct list_head *pos; + struct perf_dwfl_node *node; + + perf_init_dwfl(); + + list_for_each(pos, &perf_dwfl_files) { + node = list_entry(pos, struct perf_dwfl_node, node); + if (!strcmp(node->exec_file, file_name)) + return node->dwfl; + } + + return NULL; +} + + +/* + * Return 1 if were able to cache the DWARF debug info. 0 otherwise. + */ +static int perf_dwfl_cache(const char *file_name, Dwfl *dwfl) +{ + struct perf_dwfl_node *dwfl_node; + + dwfl_node = malloc(sizeof(struct perf_dwfl_node)); + + if (!dwfl_node) { + pr_debug("%s(): Unable to alloc memory\n", __func__); + return 0; + } + INIT_LIST_HEAD(&dwfl_node->node); + dwfl_node->dwfl = dwfl; + dwfl_node->exec_file = strdup(file_name); + + list_add(&dwfl_node->node, &perf_dwfl_files); + + return 1; +} + /* * Use the DWARF expression for the Call-frame-address and determine @@ -155,16 +212,26 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc) Dwarf_Addr start = pc; Dwarf_Addr end = pc; bool signalp; + int cached; - dwfl = dwfl_begin(&offline_callbacks); + cached = 1; + dwfl = perf_dwfl_find(exec_file); if (!dwfl) { - pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1)); - return -1; - } - - if (dwfl_report_offline(dwfl, "", exec_file, -1) == NULL) { - pr_debug("dwfl_report_offline() failed %s\n", dwarf_errmsg(-1)); - goto out; + dwfl = dwfl_begin(&offline_callbacks); + if (!dwfl) { + pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1)); + return -1; + } + + if (dwfl_report_offline(dwfl, "", exec_file, -1) == NULL) { + pr_debug("dwfl_report_offline() failed %s\n", + dwarf_errmsg(-1)); + goto out; + } + /* + * If we fail to alloc memory, we lose the benefit of caching + */ + cached = perf_dwfl_cache(exec_file, dwfl); } mod = dwfl_addrmodule(dwfl, pc); @@ -194,7 +261,13 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc) rc = check_return_reg(ra_regno, frame); out: - dwfl_end(dwfl); + /* + * In the unlikely event that we were unable to cache the debug + * info, release it so we don't leak fds. + */ + if (!cached) + dwfl_end(dwfl); + return rc; } -- 1.8.4.2 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/powerpc: Cache the DWARF debug info 2014-10-21 18:56 [PATCH] perf/powerpc: Cache the DWARF debug info Sukadev Bhattiprolu @ 2014-10-21 19:08 ` Arnaldo Carvalho de Melo 2014-10-22 0:09 ` Sukadev Bhattiprolu 0 siblings, 1 reply; 12+ messages in thread From: Arnaldo Carvalho de Melo @ 2014-10-21 19:08 UTC (permalink / raw) To: Sukadev Bhattiprolu; +Cc: Jiri Olsa, Anton Blanchard, linux-kernel Em Tue, Oct 21, 2014 at 11:56:10AM -0700, Sukadev Bhattiprolu escreveu: > >From 773a3608a0cd2daf02e244cb9ffbf5bb6a0e724e Mon Sep 17 00:00:00 2001 > From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> > Date: Tue, 21 Oct 2014 13:20:22 -0500 > Subject: [PATCH 1/1] perf/powerpc: Cache DWARF debug info Jiri, isn't his related to that dsos caching thing you created? Anyway, there is a per machine struct (struct machine) where things like this should be put, please do not add globals or static variables in functions. - Arnaldo > Cache the DWARF debug info for DSO so we don't have to rebuild it for > each address in the DSO (duh!). > > $ time /tmp/perf.orig report -g > /tmp/report.1 > > real 0m1.845s > user 0m0.963s > sys 0m0.879s > > $ time /tmp/perf report -g > /tmp/report.2 > > real 0m0.089s > user 0m0.082s > sys 0m0.006s > > $ diff /tmp/report.1 /tmp/report.2 > $ > > Reported-by: Anton Blanchard <anton@samba.org> > Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> > --- > tools/perf/arch/powerpc/util/skip-callchain-idx.c | 91 ++++++++++++++++++++--- > 1 file changed, 82 insertions(+), 9 deletions(-) > > diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c > index d73ef8b..bfe254d 100644 > --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c > +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c > @@ -38,6 +38,63 @@ static const Dwfl_Callbacks offline_callbacks = { > .section_address = dwfl_offline_section_address, > }; > > +struct list_head perf_dwfl_files; > + > +struct perf_dwfl_node { > + struct list_head node; > + char *exec_file; > + Dwfl *dwfl; > +}; > + > +static void perf_init_dwfl(void) > +{ > + static int dwfl_inited; > + > + if (!dwfl_inited) { > + dwfl_inited = 1; > + INIT_LIST_HEAD(&perf_dwfl_files); > + } > +} > + > +static Dwfl *perf_dwfl_find(const char *file_name) > +{ > + struct list_head *pos; > + struct perf_dwfl_node *node; > + > + perf_init_dwfl(); > + > + list_for_each(pos, &perf_dwfl_files) { > + node = list_entry(pos, struct perf_dwfl_node, node); > + if (!strcmp(node->exec_file, file_name)) > + return node->dwfl; > + } > + > + return NULL; > +} > + > + > +/* > + * Return 1 if were able to cache the DWARF debug info. 0 otherwise. > + */ > +static int perf_dwfl_cache(const char *file_name, Dwfl *dwfl) > +{ > + struct perf_dwfl_node *dwfl_node; > + > + dwfl_node = malloc(sizeof(struct perf_dwfl_node)); > + > + if (!dwfl_node) { > + pr_debug("%s(): Unable to alloc memory\n", __func__); > + return 0; > + } > + INIT_LIST_HEAD(&dwfl_node->node); > + dwfl_node->dwfl = dwfl; > + dwfl_node->exec_file = strdup(file_name); > + > + list_add(&dwfl_node->node, &perf_dwfl_files); > + > + return 1; > +} > + > > /* > * Use the DWARF expression for the Call-frame-address and determine > @@ -155,16 +212,26 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc) > Dwarf_Addr start = pc; > Dwarf_Addr end = pc; > bool signalp; > + int cached; > > - dwfl = dwfl_begin(&offline_callbacks); > + cached = 1; > + dwfl = perf_dwfl_find(exec_file); > if (!dwfl) { > - pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1)); > - return -1; > - } > - > - if (dwfl_report_offline(dwfl, "", exec_file, -1) == NULL) { > - pr_debug("dwfl_report_offline() failed %s\n", dwarf_errmsg(-1)); > - goto out; > + dwfl = dwfl_begin(&offline_callbacks); > + if (!dwfl) { > + pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1)); > + return -1; > + } > + > + if (dwfl_report_offline(dwfl, "", exec_file, -1) == NULL) { > + pr_debug("dwfl_report_offline() failed %s\n", > + dwarf_errmsg(-1)); > + goto out; > + } > + /* > + * If we fail to alloc memory, we lose the benefit of caching > + */ > + cached = perf_dwfl_cache(exec_file, dwfl); > } > > mod = dwfl_addrmodule(dwfl, pc); > @@ -194,7 +261,13 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc) > rc = check_return_reg(ra_regno, frame); > > out: > - dwfl_end(dwfl); > + /* > + * In the unlikely event that we were unable to cache the debug > + * info, release it so we don't leak fds. > + */ > + if (!cached) > + dwfl_end(dwfl); > + > return rc; > } > > -- > 1.8.4.2 ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/powerpc: Cache the DWARF debug info 2014-10-21 19:08 ` Arnaldo Carvalho de Melo @ 2014-10-22 0:09 ` Sukadev Bhattiprolu 2014-10-22 8:17 ` Jiri Olsa 2014-10-30 6:42 ` [tip:perf/core] perf tools powerpc: " tip-bot for Sukadev Bhattiprolu 0 siblings, 2 replies; 12+ messages in thread From: Sukadev Bhattiprolu @ 2014-10-22 0:09 UTC (permalink / raw) To: Arnaldo Carvalho de Melo; +Cc: Jiri Olsa, Anton Blanchard, linux-kernel Arnaldo Carvalho de Melo [acme@kernel.org] wrote: | Em Tue, Oct 21, 2014 at 11:56:10AM -0700, Sukadev Bhattiprolu escreveu: | > >From 773a3608a0cd2daf02e244cb9ffbf5bb6a0e724e Mon Sep 17 00:00:00 2001 | > From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> | > Date: Tue, 21 Oct 2014 13:20:22 -0500 | > Subject: [PATCH 1/1] perf/powerpc: Cache DWARF debug info | | Jiri, isn't his related to that dsos caching thing you created? | | Anyway, there is a per machine struct (struct machine) where things like | this should be put, please do not add globals or static variables in | functions. Good point. How about something like this: --- Cache the DWARF debug info for DSO so we don't have to rebuild it for each address in the DSO. Note that dso__new() uses calloc() so don't need to set dso->dwfl to NULL. $ /tmp/perf.orig --version perf version 3.18.rc1.gc2661b8 $ /tmp/perf.new --version perf version 3.18.rc1.g402d62 $ perf stat -e cycles,instructions /tmp/perf.orig report -g > orig Performance counter stats for '/tmp/perf.orig report -g': 6,428,177,183 cycles # 0.000 GHz 4,176,288,391 instructions # 0.65 insns per cycle 1.840666132 seconds time elapsed $ perf stat -e cycles,instructions /tmp/perf.new report -g > new Performance counter stats for '/tmp/perf.new report -g': 305,773,142 cycles # 0.000 GHz 276,048,272 instructions # 0.90 insns per cycle 0.087693543 seconds time elapsed $ diff orig new $ Changelog[v2]: [Arnaldo Carvalho] Cache in existing global objects rather than create new static/globals in functions. Reported-by: Anton Blanchard <anton@samba.org> Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> --- tools/perf/arch/powerpc/util/skip-callchain-idx.c | 33 +++++++++++++++-------- tools/perf/util/dso.h | 1 + 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index d73ef8b..9892b0f 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -145,7 +145,7 @@ static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc) * yet used) * -1 in case of errors */ -static int check_return_addr(const char *exec_file, Dwarf_Addr pc) +static int check_return_addr(struct dso *dso, Dwarf_Addr pc) { int rc = -1; Dwfl *dwfl; @@ -156,15 +156,27 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc) Dwarf_Addr end = pc; bool signalp; - dwfl = dwfl_begin(&offline_callbacks); - if (!dwfl) { - pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1)); - return -1; - } + dwfl = dso->dwfl; - if (dwfl_report_offline(dwfl, "", exec_file, -1) == NULL) { - pr_debug("dwfl_report_offline() failed %s\n", dwarf_errmsg(-1)); - goto out; + if (!dwfl) { + dwfl = dwfl_begin(&offline_callbacks); + if (!dwfl) { + pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1)); + return -1; + } + + if (dwfl_report_offline(dwfl, "", dso->long_name, -1) == NULL) { + pr_debug("dwfl_report_offline() failed %s\n", + dwarf_errmsg(-1)); + /* + * We normally cache the DWARF debug info and never + * call dwfl_end(). But to prevent fd leak, free in + * case of error. + */ + dwfl_end(dwfl); + goto out; + } + dso->dwfl = dwfl; } mod = dwfl_addrmodule(dwfl, pc); @@ -194,7 +206,6 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc) rc = check_return_reg(ra_regno, frame); out: - dwfl_end(dwfl); return rc; } @@ -246,7 +257,7 @@ int arch_skip_callchain_idx(struct machine *machine, struct thread *thread, return skip_slot; } - rc = check_return_addr(dso->long_name, ip); + rc = check_return_addr(dso, ip); pr_debug("DSO %s, nr %" PRIx64 ", ip 0x%" PRIx64 "rc %d\n", dso->long_name, chain->nr, ip, rc); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index acb651a..3c9b391 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -127,6 +127,7 @@ struct dso { const char *long_name; u16 long_name_len; u16 short_name_len; + void *dwfl; /* DWARF debug info */ /* dso data file */ struct { ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/powerpc: Cache the DWARF debug info 2014-10-22 0:09 ` Sukadev Bhattiprolu @ 2014-10-22 8:17 ` Jiri Olsa 2014-10-22 17:46 ` Sukadev Bhattiprolu 2014-10-30 6:42 ` [tip:perf/core] perf tools powerpc: " tip-bot for Sukadev Bhattiprolu 1 sibling, 1 reply; 12+ messages in thread From: Jiri Olsa @ 2014-10-22 8:17 UTC (permalink / raw) To: Sukadev Bhattiprolu Cc: Arnaldo Carvalho de Melo, Anton Blanchard, linux-kernel On Tue, Oct 21, 2014 at 05:09:58PM -0700, Sukadev Bhattiprolu wrote: > Arnaldo Carvalho de Melo [acme@kernel.org] wrote: > | Em Tue, Oct 21, 2014 at 11:56:10AM -0700, Sukadev Bhattiprolu escreveu: > | > >From 773a3608a0cd2daf02e244cb9ffbf5bb6a0e724e Mon Sep 17 00:00:00 2001 > | > From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> > | > Date: Tue, 21 Oct 2014 13:20:22 -0500 > | > Subject: [PATCH 1/1] perf/powerpc: Cache DWARF debug info > | > | Jiri, isn't his related to that dsos caching thing you created? well, yea.. but Sukadev needs dw handle on top of that > | > | Anyway, there is a per machine struct (struct machine) where things like > | this should be put, please do not add globals or static variables in > | functions. > > Good point. How about something like this: yep, and the code got much shorter ;-) SNIP > --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c > +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c > @@ -145,7 +145,7 @@ static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc) > * yet used) > * -1 in case of errors > */ > -static int check_return_addr(const char *exec_file, Dwarf_Addr pc) > +static int check_return_addr(struct dso *dso, Dwarf_Addr pc) > { > int rc = -1; > Dwfl *dwfl; > @@ -156,15 +156,27 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc) > Dwarf_Addr end = pc; > bool signalp; > > - dwfl = dwfl_begin(&offline_callbacks); > - if (!dwfl) { > - pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1)); > - return -1; > - } > + dwfl = dso->dwfl; > > - if (dwfl_report_offline(dwfl, "", exec_file, -1) == NULL) { > - pr_debug("dwfl_report_offline() failed %s\n", dwarf_errmsg(-1)); > - goto out; > + if (!dwfl) { > + dwfl = dwfl_begin(&offline_callbacks); > + if (!dwfl) { > + pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1)); > + return -1; > + } > + > + if (dwfl_report_offline(dwfl, "", dso->long_name, -1) == NULL) { > + pr_debug("dwfl_report_offline() failed %s\n", > + dwarf_errmsg(-1)); > + /* > + * We normally cache the DWARF debug info and never > + * call dwfl_end(). But to prevent fd leak, free in > + * case of error. > + */ > + dwfl_end(dwfl); > + goto out; > + } > + dso->dwfl = dwfl; so by this we get powerpc arch code sharing dw handle via dso object, but we have lot of generic code too ;-) could you make this happen for unwind__get_entries.. probably both sharing same generic code I guess thanks, jirka ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/powerpc: Cache the DWARF debug info 2014-10-22 8:17 ` Jiri Olsa @ 2014-10-22 17:46 ` Sukadev Bhattiprolu 2014-10-23 13:37 ` Arnaldo Carvalho de Melo 0 siblings, 1 reply; 12+ messages in thread From: Sukadev Bhattiprolu @ 2014-10-22 17:46 UTC (permalink / raw) To: Jiri Olsa; +Cc: Arnaldo Carvalho de Melo, Anton Blanchard, linux-kernel Jiri Olsa [jolsa@redhat.com] wrote: | > + goto out; | > + } | > + dso->dwfl = dwfl; | | so by this we get powerpc arch code sharing dw handle via dso object, | but we have lot of generic code too ;-) Well, this applies to powerpc... | | could you make this happen for unwind__get_entries.. probably | both sharing same generic code I guess and unwind_get_entries() applies only to x86 and arm right ? ;-) Or at least thats what the config/Makefile says. I can take a look at unwind_get_entries(), but can you please merge this fix for now, since the current performance is bad? | | thanks, | jirka ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/powerpc: Cache the DWARF debug info 2014-10-22 17:46 ` Sukadev Bhattiprolu @ 2014-10-23 13:37 ` Arnaldo Carvalho de Melo 2014-10-23 14:12 ` Jiri Olsa 0 siblings, 1 reply; 12+ messages in thread From: Arnaldo Carvalho de Melo @ 2014-10-23 13:37 UTC (permalink / raw) To: Sukadev Bhattiprolu; +Cc: Jiri Olsa, Anton Blanchard, linux-kernel Em Wed, Oct 22, 2014 at 10:46:59AM -0700, Sukadev Bhattiprolu escreveu: > Jiri Olsa [jolsa@redhat.com] wrote: > | > + goto out; > | > + } > | > + dso->dwfl = dwfl; > | > | so by this we get powerpc arch code sharing dw handle via dso object, > | but we have lot of generic code too ;-) > > Well, this applies to powerpc... > > | > | could you make this happen for unwind__get_entries.. probably > | both sharing same generic code I guess > > and unwind_get_entries() applies only to x86 and arm right ? ;-) > Or at least thats what the config/Makefile says. > I can take a look at unwind_get_entries(), but can you please merge > this fix for now, since the current performance is bad? Right, I think the way it is now is a good compromise, i.e. you seem to be using the right place to cache this, this is restricted to powerpc, i.e. if leaks or excessive memory usage happens in workloads with lots of DSOs having dwfl handlers open at the same times happens, it doesn't affect users in other arches. Jiri: do you agree? I'm applying it on my local tree, as I need to change some of those functions, places where machine and thread are being passed are being changed to receive just a thread pointer, since the machine where a thread is can be obtained, after a patch I added on my local tree, can be obtained from thread->mg->machine. - Arnaldo ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/powerpc: Cache the DWARF debug info 2014-10-23 13:37 ` Arnaldo Carvalho de Melo @ 2014-10-23 14:12 ` Jiri Olsa 2014-10-23 14:26 ` Arnaldo Carvalho de Melo 0 siblings, 1 reply; 12+ messages in thread From: Jiri Olsa @ 2014-10-23 14:12 UTC (permalink / raw) To: Arnaldo Carvalho de Melo Cc: Sukadev Bhattiprolu, Anton Blanchard, linux-kernel On Thu, Oct 23, 2014 at 10:37:24AM -0300, Arnaldo Carvalho de Melo wrote: > Em Wed, Oct 22, 2014 at 10:46:59AM -0700, Sukadev Bhattiprolu escreveu: > > Jiri Olsa [jolsa@redhat.com] wrote: > > | > + goto out; > > | > + } > > | > + dso->dwfl = dwfl; > > | > > | so by this we get powerpc arch code sharing dw handle via dso object, > > | but we have lot of generic code too ;-) > > > > Well, this applies to powerpc... > > > > | > > | could you make this happen for unwind__get_entries.. probably > > | both sharing same generic code I guess > > > > and unwind_get_entries() applies only to x86 and arm right ? ;-) > > Or at least thats what the config/Makefile says. > > > I can take a look at unwind_get_entries(), but can you please merge > > this fix for now, since the current performance is bad? > > Right, I think the way it is now is a good compromise, i.e. you seem to > be using the right place to cache this, this is restricted to powerpc, > i.e. if leaks or excessive memory usage happens in workloads with lots > of DSOs having dwfl handlers open at the same times happens, it doesn't > affect users in other arches. > > Jiri: do you agree? well it's powerpc specific now.. anyway the code in the patch to open the dwfl is generic and should be in in generic place.. like in some extern function that the x86 would call to get the dwfl handle also the current patch leaks the dso->dwfl, which is never freed -> dwfl_end-ed, dwfl_end should be called of in dso__delete I think jirka ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/powerpc: Cache the DWARF debug info 2014-10-23 14:12 ` Jiri Olsa @ 2014-10-23 14:26 ` Arnaldo Carvalho de Melo 2014-10-23 15:13 ` Jiri Olsa 0 siblings, 1 reply; 12+ messages in thread From: Arnaldo Carvalho de Melo @ 2014-10-23 14:26 UTC (permalink / raw) To: Jiri Olsa; +Cc: Sukadev Bhattiprolu, Anton Blanchard, linux-kernel Em Thu, Oct 23, 2014 at 04:12:13PM +0200, Jiri Olsa escreveu: > On Thu, Oct 23, 2014 at 10:37:24AM -0300, Arnaldo Carvalho de Melo wrote: > > Em Wed, Oct 22, 2014 at 10:46:59AM -0700, Sukadev Bhattiprolu escreveu: > > > Jiri Olsa [jolsa@redhat.com] wrote: > > > | > + goto out; > > > | > + } > > > | > + dso->dwfl = dwfl; > > > | so by this we get powerpc arch code sharing dw handle via dso object, > > > | but we have lot of generic code too ;-) > > > Well, this applies to powerpc... > > > | could you make this happen for unwind__get_entries.. probably > > > | both sharing same generic code I guess > > > and unwind_get_entries() applies only to x86 and arm right ? ;-) > > > Or at least thats what the config/Makefile says. > > > I can take a look at unwind_get_entries(), but can you please merge > > > this fix for now, since the current performance is bad? > > Right, I think the way it is now is a good compromise, i.e. you seem to > > be using the right place to cache this, this is restricted to powerpc, > > i.e. if leaks or excessive memory usage happens in workloads with lots > > of DSOs having dwfl handlers open at the same times happens, it doesn't > > affect users in other arches. > > > > Jiri: do you agree? > > well it's powerpc specific now.. anyway the code in the patch > to open the dwfl is generic and should be in in generic > place.. like in some extern function that the x86 would call > to get the dwfl handle > > also the current patch leaks the dso->dwfl, which is never freed -> dwfl_end-ed, > dwfl_end should be called of in dso__delete I think Yeah, as my comment implies, I guess those are all valid concerns, i.e. the patch needs more work, I was willing to accept it as-is because it would hurt just Sukadev (i.e. powerpc), as he seems to be in a hurry to get the performance improved :-) I will remove it from my tree for now, as in the end what I'm doing doesn't touch those specific functions. But I think this will go on dragging extra work, i.e.: how to limit the number of dwfl handlers used? Should we have just a front end cache like what is done for machine__findnew_thread() (with just the last hit) and perhaps then have a few slots for keeping N dwfl open and when that number is up we check the one with less queries and close it? Jiri, are you doing that on that cache stuff you did? I mean how do you keep this stuff: /* * Global list of open DSOs and the counter. */ static LIST_HEAD(dso__data_open); static long dso__data_open_cnt; Also this should not be global at all, this should be on struct machine, since a DSO that is present on a machine may have the same name as the dso on another machine (two guests, hosts, etc) and thus should not be kept on the same list, etc. So reading a bit more you seem to check rlimit, do LRUing when hitting the limit, etc, that is why I thought about that stuff when Sukadev first posted this patch... Sukadev, all this is in tools/perf/util/dso.c That is why I thought it would be a compromise to put what he did, it would not make the existing situation that much worse, work needs to be done in this area :-\ - Arnaldo ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/powerpc: Cache the DWARF debug info 2014-10-23 14:26 ` Arnaldo Carvalho de Melo @ 2014-10-23 15:13 ` Jiri Olsa 2014-10-23 15:33 ` Arnaldo Carvalho de Melo 0 siblings, 1 reply; 12+ messages in thread From: Jiri Olsa @ 2014-10-23 15:13 UTC (permalink / raw) To: Arnaldo Carvalho de Melo Cc: Sukadev Bhattiprolu, Anton Blanchard, linux-kernel On Thu, Oct 23, 2014 at 11:26:34AM -0300, Arnaldo Carvalho de Melo wrote: > Em Thu, Oct 23, 2014 at 04:12:13PM +0200, Jiri Olsa escreveu: > > On Thu, Oct 23, 2014 at 10:37:24AM -0300, Arnaldo Carvalho de Melo wrote: > > > Em Wed, Oct 22, 2014 at 10:46:59AM -0700, Sukadev Bhattiprolu escreveu: > > > > Jiri Olsa [jolsa@redhat.com] wrote: > > > > | > + goto out; > > > > | > + } > > > > | > + dso->dwfl = dwfl; > > > > > | so by this we get powerpc arch code sharing dw handle via dso object, > > > > | but we have lot of generic code too ;-) > > > > > Well, this applies to powerpc... > > > > > | could you make this happen for unwind__get_entries.. probably > > > > | both sharing same generic code I guess > > > > > and unwind_get_entries() applies only to x86 and arm right ? ;-) > > > > Or at least thats what the config/Makefile says. > > > > > I can take a look at unwind_get_entries(), but can you please merge > > > > this fix for now, since the current performance is bad? > > > > Right, I think the way it is now is a good compromise, i.e. you seem to > > > be using the right place to cache this, this is restricted to powerpc, > > > i.e. if leaks or excessive memory usage happens in workloads with lots > > > of DSOs having dwfl handlers open at the same times happens, it doesn't > > > affect users in other arches. > > > > > > Jiri: do you agree? > > > > well it's powerpc specific now.. anyway the code in the patch > > to open the dwfl is generic and should be in in generic > > place.. like in some extern function that the x86 would call > > to get the dwfl handle > > > > also the current patch leaks the dso->dwfl, which is never freed -> dwfl_end-ed, > > dwfl_end should be called of in dso__delete I think > > Yeah, as my comment implies, I guess those are all valid concerns, i.e. > the patch needs more work, I was willing to accept it as-is because it > would hurt just Sukadev (i.e. powerpc), as he seems to be in a hurry to > get the performance improved :-) > > I will remove it from my tree for now, as in the end what I'm doing > doesn't touch those specific functions. > > But I think this will go on dragging extra work, i.e.: how to limit the > number of dwfl handlers used? Should we have just a front end cache like > what is done for machine__findnew_thread() (with just the last hit) and > perhaps then have a few slots for keeping N dwfl open and when that > number is up we check the one with less queries and close it? I think this can stay in 'struct dso' which is limited by that code you show below I think we need just add generic functions that allocates/destroys the dwfl handle and lazy allocate&store this handle whenever it's needed and destroy it in dso__delete > > Jiri, are you doing that on that cache stuff you did? I mean how do > you keep this stuff: > > /* > * Global list of open DSOs and the counter. > */ > static LIST_HEAD(dso__data_open); > static long dso__data_open_cnt; > > Also this should not be global at all, this should be on struct machine, > since a DSO that is present on a machine may have the same name as the > dso on another machine (two guests, hosts, etc) and thus should not be > kept on the same list, etc. the justification for this to be static is that dso__data_open_cnt is checked against the rlimit for user and the point is to keep 'some amount' of dso object opened, so we dont waste time by reopening the 'some amount' is currently the half of the allowed limit for user you can check the logic in open_dso and check_data_close functions > So reading a bit more you seem to check rlimit, do LRUing when hitting > the limit, etc, that is why I thought about that stuff when Sukadev > first posted this patch... > > Sukadev, all this is in tools/perf/util/dso.c > > That is why I thought it would be a compromise to put what he did, it > would not make the existing situation that much worse, work needs to be > done in this area :-\ I think we just need to put that libdw handle into dso object as I suggested above jirka ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/powerpc: Cache the DWARF debug info 2014-10-23 15:13 ` Jiri Olsa @ 2014-10-23 15:33 ` Arnaldo Carvalho de Melo 2014-10-23 15:39 ` Jiri Olsa 0 siblings, 1 reply; 12+ messages in thread From: Arnaldo Carvalho de Melo @ 2014-10-23 15:33 UTC (permalink / raw) To: Jiri Olsa; +Cc: Sukadev Bhattiprolu, Anton Blanchard, linux-kernel Em Thu, Oct 23, 2014 at 05:13:06PM +0200, Jiri Olsa escreveu: > On Thu, Oct 23, 2014 at 11:26:34AM -0300, Arnaldo Carvalho de Melo wrote: > > That is why I thought it would be a compromise to put what he did, it > > would not make the existing situation that much worse, work needs to be > > done in this area :-\ > I think we just need to put that libdw handle into dso object > as I suggested above Isn't it there already? The patch does: +++ b/tools/perf/util/dso.h @@ -127,6 +127,7 @@ struct dso { const char *long_name; u16 long_name_len; u16 short_name_len; + void *dwfl; /* DWARF debug info */ ---------- What you want, on top of that, at a minimum, we somehow limit the number of simultaneously dwfl_begin()'ed DSOs, right? - Arnaldo ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] perf/powerpc: Cache the DWARF debug info 2014-10-23 15:33 ` Arnaldo Carvalho de Melo @ 2014-10-23 15:39 ` Jiri Olsa 0 siblings, 0 replies; 12+ messages in thread From: Jiri Olsa @ 2014-10-23 15:39 UTC (permalink / raw) To: Arnaldo Carvalho de Melo Cc: Sukadev Bhattiprolu, Anton Blanchard, linux-kernel On Thu, Oct 23, 2014 at 12:33:22PM -0300, Arnaldo Carvalho de Melo wrote: > Em Thu, Oct 23, 2014 at 05:13:06PM +0200, Jiri Olsa escreveu: > > On Thu, Oct 23, 2014 at 11:26:34AM -0300, Arnaldo Carvalho de Melo wrote: > > > That is why I thought it would be a compromise to put what he did, it > > > would not make the existing situation that much worse, work needs to be > > > done in this area :-\ > > > I think we just need to put that libdw handle into dso object > > as I suggested above > > Isn't it there already? > > The patch does: > > +++ b/tools/perf/util/dso.h > @@ -127,6 +127,7 @@ struct dso { > const char *long_name; > u16 long_name_len; > u16 short_name_len; > + void *dwfl; /* DWARF debug info */ > > > ---------- > > What you want, on top of that, at a minimum, we somehow limit the number > of simultaneously dwfl_begin()'ed DSOs, right? no, the patch is doing that.. as I wrote: --- I think this can stay in 'struct dso' which is limited by that code you show below (dso__data_open code) I think we need just add generic functions that allocates/destroys the dwfl handle and lazy allocate&store this handle whenever it's needed and destroy it in dso__delete --- so just: 1) generic function to do the lazy allocation of dwlf handle 2) store the handle within dso object 3) destroy it in dso__delete the limitation for opened dso objects number should cover the dwlf limit thanks, jirka ^ permalink raw reply [flat|nested] 12+ messages in thread
* [tip:perf/core] perf tools powerpc: Cache the DWARF debug info 2014-10-22 0:09 ` Sukadev Bhattiprolu 2014-10-22 8:17 ` Jiri Olsa @ 2014-10-30 6:42 ` tip-bot for Sukadev Bhattiprolu 1 sibling, 0 replies; 12+ messages in thread From: tip-bot for Sukadev Bhattiprolu @ 2014-10-30 6:42 UTC (permalink / raw) To: linux-tip-commits Cc: linux-kernel, tglx, jolsa, hpa, mingo, anton, acme, sukadev, anton Commit-ID: 7d073b335edc8d97af730c2e3b83ed6642bd3c27 Gitweb: http://git.kernel.org/tip/7d073b335edc8d97af730c2e3b83ed6642bd3c27 Author: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> AuthorDate: Tue, 21 Oct 2014 17:09:58 -0700 Committer: Arnaldo Carvalho de Melo <acme@redhat.com> CommitDate: Wed, 29 Oct 2014 10:32:46 -0200 perf tools powerpc: Cache the DWARF debug info Cache the DWARF debug info for DSO so we don't have to rebuild it for each address in the DSO. Note that dso__new() uses calloc() so don't need to set dso->dwfl to NULL. $ /tmp/perf.orig --version perf version 3.18.rc1.gc2661b8 $ /tmp/perf.new --version perf version 3.18.rc1.g402d62 $ perf stat -e cycles,instructions /tmp/perf.orig report -g > orig Performance counter stats for '/tmp/perf.orig report -g': 6,428,177,183 cycles # 0.000 GHz 4,176,288,391 instructions # 0.65 insns per cycle 1.840666132 seconds time elapsed $ perf stat -e cycles,instructions /tmp/perf.new report -g > new Performance counter stats for '/tmp/perf.new report -g': 305,773,142 cycles # 0.000 GHz 276,048,272 instructions # 0.90 insns per cycle 0.087693543 seconds time elapsed $ diff orig new $ Changelog[v2]: [Arnaldo Carvalho] Cache in existing global objects rather than create new static/globals in functions. Reported-by: Anton Blanchard <anton@samba.org> Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> Cc: Anton Blanchard <anton@au1.ibm.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/20141022000958.GB2228@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/arch/powerpc/util/skip-callchain-idx.c | 33 +++++++++++++++-------- tools/perf/util/dso.h | 1 + 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index d73ef8b..9892b0f 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -145,7 +145,7 @@ static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc) * yet used) * -1 in case of errors */ -static int check_return_addr(const char *exec_file, Dwarf_Addr pc) +static int check_return_addr(struct dso *dso, Dwarf_Addr pc) { int rc = -1; Dwfl *dwfl; @@ -156,15 +156,27 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc) Dwarf_Addr end = pc; bool signalp; - dwfl = dwfl_begin(&offline_callbacks); - if (!dwfl) { - pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1)); - return -1; - } + dwfl = dso->dwfl; - if (dwfl_report_offline(dwfl, "", exec_file, -1) == NULL) { - pr_debug("dwfl_report_offline() failed %s\n", dwarf_errmsg(-1)); - goto out; + if (!dwfl) { + dwfl = dwfl_begin(&offline_callbacks); + if (!dwfl) { + pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1)); + return -1; + } + + if (dwfl_report_offline(dwfl, "", dso->long_name, -1) == NULL) { + pr_debug("dwfl_report_offline() failed %s\n", + dwarf_errmsg(-1)); + /* + * We normally cache the DWARF debug info and never + * call dwfl_end(). But to prevent fd leak, free in + * case of error. + */ + dwfl_end(dwfl); + goto out; + } + dso->dwfl = dwfl; } mod = dwfl_addrmodule(dwfl, pc); @@ -194,7 +206,6 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc) rc = check_return_reg(ra_regno, frame); out: - dwfl_end(dwfl); return rc; } @@ -246,7 +257,7 @@ int arch_skip_callchain_idx(struct machine *machine, struct thread *thread, return skip_slot; } - rc = check_return_addr(dso->long_name, ip); + rc = check_return_addr(dso, ip); pr_debug("DSO %s, nr %" PRIx64 ", ip 0x%" PRIx64 "rc %d\n", dso->long_name, chain->nr, ip, rc); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index acb651a..3c9b391 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -127,6 +127,7 @@ struct dso { const char *long_name; u16 long_name_len; u16 short_name_len; + void *dwfl; /* DWARF debug info */ /* dso data file */ struct { ^ permalink raw reply related [flat|nested] 12+ messages in thread
end of thread, other threads:[~2014-10-30 6:42 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2014-10-21 18:56 [PATCH] perf/powerpc: Cache the DWARF debug info Sukadev Bhattiprolu 2014-10-21 19:08 ` Arnaldo Carvalho de Melo 2014-10-22 0:09 ` Sukadev Bhattiprolu 2014-10-22 8:17 ` Jiri Olsa 2014-10-22 17:46 ` Sukadev Bhattiprolu 2014-10-23 13:37 ` Arnaldo Carvalho de Melo 2014-10-23 14:12 ` Jiri Olsa 2014-10-23 14:26 ` Arnaldo Carvalho de Melo 2014-10-23 15:13 ` Jiri Olsa 2014-10-23 15:33 ` Arnaldo Carvalho de Melo 2014-10-23 15:39 ` Jiri Olsa 2014-10-30 6:42 ` [tip:perf/core] perf tools powerpc: " tip-bot for Sukadev Bhattiprolu
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox