* [PATCH 0/2] perf hist: Fix bogus profiles when filters are enabled @ 2025-01-08 8:23 Dmitry Vyukov 2025-01-08 7:36 ` [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code Dmitry Vyukov 2025-01-08 7:36 ` [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled Dmitry Vyukov 0 siblings, 2 replies; 9+ messages in thread From: Dmitry Vyukov @ 2025-01-08 8:23 UTC (permalink / raw) To: namhyung, irogers; +Cc: linux-perf-users, linux-kernel, Dmitry Vyukov Dmitry Vyukov (2): perf hist: Deduplicate cmp/sort/collapse code perf hist: Fix bogus profiles when filters are enabled tools/perf/util/hist.c | 115 +++++++++++++++++++---------------------- tools/perf/util/hist.h | 2 - 2 files changed, 54 insertions(+), 63 deletions(-) base-commit: 09a0fa92e5b45e99cf435b2fbf5ebcf889cf8780 -- 2.47.1.613.gc27f4b7a9f-goog ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code 2025-01-08 8:23 [PATCH 0/2] perf hist: Fix bogus profiles when filters are enabled Dmitry Vyukov @ 2025-01-08 7:36 ` Dmitry Vyukov 2025-01-09 21:43 ` Namhyung Kim 2025-01-08 7:36 ` [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled Dmitry Vyukov 1 sibling, 1 reply; 9+ messages in thread From: Dmitry Vyukov @ 2025-01-08 7:36 UTC (permalink / raw) To: namhyung, irogers; +Cc: Dmitry Vyukov, linux-perf-users, linux-kernel Application of cmp/sort/collapse fmt callbacks is duplicated 6 times. Factor it into a common helper function. NFC. Signed-off-by: Dmitry Vyukov <dvyukov@google.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Ian Rogers <irogers@google.com> Cc: linux-perf-users@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- tools/perf/util/hist.c | 104 +++++++++++++++++------------------------ tools/perf/util/hist.h | 2 - 2 files changed, 44 insertions(+), 62 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index fff1345658016..8e4e844425370 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -32,6 +32,9 @@ #include <linux/time64.h> #include <linux/zalloc.h> +static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); +static int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); + static bool hists__filter_entry_by_dso(struct hists *hists, struct hist_entry *he); static bool hists__filter_entry_by_thread(struct hists *hists, @@ -1292,19 +1295,27 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, return err; } -int64_t -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) +static int64_t +hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left, + struct hist_entry *right, unsigned long fn_offset, + bool ignore_dynamic, bool ignore_skipped) { + typedef int64_t (*fn_t)(struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *); struct hists *hists = left->hists; struct perf_hpp_fmt *fmt; int64_t cmp = 0; + fn_t fn; - hists__for_each_sort_list(hists, fmt) { - if (perf_hpp__is_dynamic_entry(fmt) && + perf_hpp_list__for_each_sort_list(hpp_list, fmt) { + if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) && !perf_hpp__defined_dynamic_entry(fmt, hists)) continue; - cmp = fmt->cmp(fmt, left, right); + if (ignore_skipped && perf_hpp__should_skip(fmt, hists)) + continue; + + fn = *(fn_t *)(((char *)fmt) + fn_offset); + cmp = fn(fmt, left, right); if (cmp) break; } @@ -1313,23 +1324,33 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) } int64_t -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) +hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { - struct hists *hists = left->hists; - struct perf_hpp_fmt *fmt; - int64_t cmp = 0; + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, + offsetof(struct perf_hpp_fmt, cmp), true, false); +} - hists__for_each_sort_list(hists, fmt) { - if (perf_hpp__is_dynamic_entry(fmt) && - !perf_hpp__defined_dynamic_entry(fmt, hists)) - continue; +static int64_t +hist_entry__sort(struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, + offsetof(struct perf_hpp_fmt, sort), false, true); +} - cmp = fmt->collapse(fmt, left, right); - if (cmp) - break; - } +int64_t +hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, + offsetof(struct perf_hpp_fmt, collapse), true, false); +} - return cmp; +static int64_t +hist_entry__collapse_hierarchy(struct perf_hpp_list *hpp_list, + struct hist_entry *left, + struct hist_entry *right) +{ + return hist_entry__cmp_impl(hpp_list, left, right, + offsetof(struct perf_hpp_fmt, collapse), false, false); } void hist_entry__delete(struct hist_entry *he) @@ -1503,14 +1524,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, while (*p != NULL) { parent = *p; iter = rb_entry(parent, struct hist_entry, rb_node_in); - - cmp = 0; - perf_hpp_list__for_each_sort_list(hpp_list, fmt) { - cmp = fmt->collapse(fmt, iter, he); - if (cmp) - break; - } - + cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he); if (!cmp) { he_stat__add_stat(&iter->stat, &he->stat); return iter; @@ -1730,24 +1744,6 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) return 0; } -static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b) -{ - struct hists *hists = a->hists; - struct perf_hpp_fmt *fmt; - int64_t cmp = 0; - - hists__for_each_sort_list(hists, fmt) { - if (perf_hpp__should_skip(fmt, a->hists)) - continue; - - cmp = fmt->sort(fmt, a, b); - if (cmp) - break; - } - - return cmp; -} - static void hists__reset_filter_stats(struct hists *hists) { hists->nr_non_filtered_entries = 0; @@ -2449,21 +2445,15 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; - struct perf_hpp_fmt *fmt; bool leftmost = true; p = &root->rb_root.rb_node; while (*p != NULL) { - int64_t cmp = 0; + int64_t cmp; parent = *p; he = rb_entry(parent, struct hist_entry, rb_node_in); - - perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { - cmp = fmt->collapse(fmt, he, pair); - if (cmp) - break; - } + cmp = hist_entry__collapse_hierarchy(he->hpp_list, he, pair); if (!cmp) goto out; @@ -2521,16 +2511,10 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *roo while (n) { struct hist_entry *iter; - struct perf_hpp_fmt *fmt; - int64_t cmp = 0; + int64_t cmp; iter = rb_entry(n, struct hist_entry, rb_node_in); - perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { - cmp = fmt->collapse(fmt, iter, he); - if (cmp) - break; - } - + cmp = hist_entry__collapse_hierarchy(he->hpp_list, iter, he); if (cmp < 0) n = n->rb_left; else if (cmp > 0) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 1131056924d9c..6cff03eb043b7 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -342,8 +342,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, struct perf_hpp; struct perf_hpp_fmt; -int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); -int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); -- 2.47.1.613.gc27f4b7a9f-goog ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code 2025-01-08 7:36 ` [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code Dmitry Vyukov @ 2025-01-09 21:43 ` Namhyung Kim 2025-01-10 13:01 ` Dmitry Vyukov 0 siblings, 1 reply; 9+ messages in thread From: Namhyung Kim @ 2025-01-09 21:43 UTC (permalink / raw) To: Dmitry Vyukov; +Cc: irogers, linux-perf-users, linux-kernel Hello, On Wed, Jan 08, 2025 at 08:36:53AM +0100, Dmitry Vyukov wrote: > Application of cmp/sort/collapse fmt callbacks is duplicated 6 times. > Factor it into a common helper function. NFC. It's interesting to use offsetof and cast it to functions. :) > > Signed-off-by: Dmitry Vyukov <dvyukov@google.com> > Cc: Namhyung Kim <namhyung@kernel.org> > Cc: Ian Rogers <irogers@google.com> > Cc: linux-perf-users@vger.kernel.org > Cc: linux-kernel@vger.kernel.org > --- > tools/perf/util/hist.c | 104 +++++++++++++++++------------------------ > tools/perf/util/hist.h | 2 - > 2 files changed, 44 insertions(+), 62 deletions(-) > > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c > index fff1345658016..8e4e844425370 100644 > --- a/tools/perf/util/hist.c > +++ b/tools/perf/util/hist.c > @@ -32,6 +32,9 @@ > #include <linux/time64.h> > #include <linux/zalloc.h> > > +static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); > +static int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); > + > static bool hists__filter_entry_by_dso(struct hists *hists, > struct hist_entry *he); > static bool hists__filter_entry_by_thread(struct hists *hists, > @@ -1292,19 +1295,27 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, > return err; > } > > -int64_t > -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) > +static int64_t > +hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left, > + struct hist_entry *right, unsigned long fn_offset, > + bool ignore_dynamic, bool ignore_skipped) > { > + typedef int64_t (*fn_t)(struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *); Probably better to put it in util/hist.h and each members (cmp, collapse, sort) use the typedef to make sure they remain in the same. > struct hists *hists = left->hists; > struct perf_hpp_fmt *fmt; > int64_t cmp = 0; > + fn_t fn; > > - hists__for_each_sort_list(hists, fmt) { > - if (perf_hpp__is_dynamic_entry(fmt) && > + perf_hpp_list__for_each_sort_list(hpp_list, fmt) { > + if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) && > !perf_hpp__defined_dynamic_entry(fmt, hists)) > continue; > > - cmp = fmt->cmp(fmt, left, right); > + if (ignore_skipped && perf_hpp__should_skip(fmt, hists)) > + continue; > + > + fn = *(fn_t *)(((char *)fmt) + fn_offset); Doesn't just below work? fn = (void *)fmt + fn_offset; Thanks, Namhyung > + cmp = fn(fmt, left, right); > if (cmp) > break; > } > @@ -1313,23 +1324,33 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) > } > > int64_t > -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) > +hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) > { > - struct hists *hists = left->hists; > - struct perf_hpp_fmt *fmt; > - int64_t cmp = 0; > + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, > + offsetof(struct perf_hpp_fmt, cmp), true, false); > +} > > - hists__for_each_sort_list(hists, fmt) { > - if (perf_hpp__is_dynamic_entry(fmt) && > - !perf_hpp__defined_dynamic_entry(fmt, hists)) > - continue; > +static int64_t > +hist_entry__sort(struct hist_entry *left, struct hist_entry *right) > +{ > + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, > + offsetof(struct perf_hpp_fmt, sort), false, true); > +} > > - cmp = fmt->collapse(fmt, left, right); > - if (cmp) > - break; > - } > +int64_t > +hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) > +{ > + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, > + offsetof(struct perf_hpp_fmt, collapse), true, false); > +} > > - return cmp; > +static int64_t > +hist_entry__collapse_hierarchy(struct perf_hpp_list *hpp_list, > + struct hist_entry *left, > + struct hist_entry *right) > +{ > + return hist_entry__cmp_impl(hpp_list, left, right, > + offsetof(struct perf_hpp_fmt, collapse), false, false); > } > > void hist_entry__delete(struct hist_entry *he) > @@ -1503,14 +1524,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, > while (*p != NULL) { > parent = *p; > iter = rb_entry(parent, struct hist_entry, rb_node_in); > - > - cmp = 0; > - perf_hpp_list__for_each_sort_list(hpp_list, fmt) { > - cmp = fmt->collapse(fmt, iter, he); > - if (cmp) > - break; > - } > - > + cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he); > if (!cmp) { > he_stat__add_stat(&iter->stat, &he->stat); > return iter; > @@ -1730,24 +1744,6 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) > return 0; > } > > -static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b) > -{ > - struct hists *hists = a->hists; > - struct perf_hpp_fmt *fmt; > - int64_t cmp = 0; > - > - hists__for_each_sort_list(hists, fmt) { > - if (perf_hpp__should_skip(fmt, a->hists)) > - continue; > - > - cmp = fmt->sort(fmt, a, b); > - if (cmp) > - break; > - } > - > - return cmp; > -} > - > static void hists__reset_filter_stats(struct hists *hists) > { > hists->nr_non_filtered_entries = 0; > @@ -2449,21 +2445,15 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, > struct rb_node **p; > struct rb_node *parent = NULL; > struct hist_entry *he; > - struct perf_hpp_fmt *fmt; > bool leftmost = true; > > p = &root->rb_root.rb_node; > while (*p != NULL) { > - int64_t cmp = 0; > + int64_t cmp; > > parent = *p; > he = rb_entry(parent, struct hist_entry, rb_node_in); > - > - perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { > - cmp = fmt->collapse(fmt, he, pair); > - if (cmp) > - break; > - } > + cmp = hist_entry__collapse_hierarchy(he->hpp_list, he, pair); > if (!cmp) > goto out; > > @@ -2521,16 +2511,10 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *roo > > while (n) { > struct hist_entry *iter; > - struct perf_hpp_fmt *fmt; > - int64_t cmp = 0; > + int64_t cmp; > > iter = rb_entry(n, struct hist_entry, rb_node_in); > - perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { > - cmp = fmt->collapse(fmt, iter, he); > - if (cmp) > - break; > - } > - > + cmp = hist_entry__collapse_hierarchy(he->hpp_list, iter, he); > if (cmp < 0) > n = n->rb_left; > else if (cmp > 0) > diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h > index 1131056924d9c..6cff03eb043b7 100644 > --- a/tools/perf/util/hist.h > +++ b/tools/perf/util/hist.h > @@ -342,8 +342,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, > struct perf_hpp; > struct perf_hpp_fmt; > > -int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); > -int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); > int hist_entry__transaction_len(void); > int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, > struct hists *hists); > -- > 2.47.1.613.gc27f4b7a9f-goog > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code 2025-01-09 21:43 ` Namhyung Kim @ 2025-01-10 13:01 ` Dmitry Vyukov 2025-01-13 13:36 ` Dmitry Vyukov 0 siblings, 1 reply; 9+ messages in thread From: Dmitry Vyukov @ 2025-01-10 13:01 UTC (permalink / raw) To: Namhyung Kim; +Cc: irogers, linux-perf-users, linux-kernel On Thu, 9 Jan 2025 at 22:43, Namhyung Kim <namhyung@kernel.org> wrote: > > Hello, > > On Wed, Jan 08, 2025 at 08:36:53AM +0100, Dmitry Vyukov wrote: > > Application of cmp/sort/collapse fmt callbacks is duplicated 6 times. > > Factor it into a common helper function. NFC. > > It's interesting to use offsetof and cast it to functions. :) Is there a better way to do this? Intuitively I reached for member pointers, but then realized it's not a thing in C... > > Signed-off-by: Dmitry Vyukov <dvyukov@google.com> > > Cc: Namhyung Kim <namhyung@kernel.org> > > Cc: Ian Rogers <irogers@google.com> > > Cc: linux-perf-users@vger.kernel.org > > Cc: linux-kernel@vger.kernel.org > > --- > > tools/perf/util/hist.c | 104 +++++++++++++++++------------------------ > > tools/perf/util/hist.h | 2 - > > 2 files changed, 44 insertions(+), 62 deletions(-) > > > > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c > > index fff1345658016..8e4e844425370 100644 > > --- a/tools/perf/util/hist.c > > +++ b/tools/perf/util/hist.c > > @@ -32,6 +32,9 @@ > > #include <linux/time64.h> > > #include <linux/zalloc.h> > > > > +static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); > > +static int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); > > + > > static bool hists__filter_entry_by_dso(struct hists *hists, > > struct hist_entry *he); > > static bool hists__filter_entry_by_thread(struct hists *hists, > > @@ -1292,19 +1295,27 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, > > return err; > > } > > > > -int64_t > > -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) > > +static int64_t > > +hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left, > > + struct hist_entry *right, unsigned long fn_offset, > > + bool ignore_dynamic, bool ignore_skipped) > > { > > + typedef int64_t (*fn_t)(struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *); > > Probably better to put it in util/hist.h and each members (cmp, collapse, > sort) use the typedef to make sure they remain in the same. Noted. But let's decide on the "perf hist: Fix bogus profiles when filters are enabled" first, that was the motivation for this refactoring. > > struct hists *hists = left->hists; > > struct perf_hpp_fmt *fmt; > > int64_t cmp = 0; > > + fn_t fn; > > > > - hists__for_each_sort_list(hists, fmt) { > > - if (perf_hpp__is_dynamic_entry(fmt) && > > + perf_hpp_list__for_each_sort_list(hpp_list, fmt) { > > + if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) && > > !perf_hpp__defined_dynamic_entry(fmt, hists)) > > continue; > > > > - cmp = fmt->cmp(fmt, left, right); > > + if (ignore_skipped && perf_hpp__should_skip(fmt, hists)) > > + continue; > > + > > + fn = *(fn_t *)(((char *)fmt) + fn_offset); > > Doesn't just below work? > > fn = (void *)fmt + fn_offset; > > Thanks, > Namhyung > > > > + cmp = fn(fmt, left, right); > > if (cmp) > > break; > > } > > @@ -1313,23 +1324,33 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) > > } > > > > int64_t > > -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) > > +hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) > > { > > - struct hists *hists = left->hists; > > - struct perf_hpp_fmt *fmt; > > - int64_t cmp = 0; > > + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, > > + offsetof(struct perf_hpp_fmt, cmp), true, false); > > +} > > > > - hists__for_each_sort_list(hists, fmt) { > > - if (perf_hpp__is_dynamic_entry(fmt) && > > - !perf_hpp__defined_dynamic_entry(fmt, hists)) > > - continue; > > +static int64_t > > +hist_entry__sort(struct hist_entry *left, struct hist_entry *right) > > +{ > > + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, > > + offsetof(struct perf_hpp_fmt, sort), false, true); > > +} > > > > - cmp = fmt->collapse(fmt, left, right); > > - if (cmp) > > - break; > > - } > > +int64_t > > +hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) > > +{ > > + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, > > + offsetof(struct perf_hpp_fmt, collapse), true, false); > > +} > > > > - return cmp; > > +static int64_t > > +hist_entry__collapse_hierarchy(struct perf_hpp_list *hpp_list, > > + struct hist_entry *left, > > + struct hist_entry *right) > > +{ > > + return hist_entry__cmp_impl(hpp_list, left, right, > > + offsetof(struct perf_hpp_fmt, collapse), false, false); > > } > > > > void hist_entry__delete(struct hist_entry *he) > > @@ -1503,14 +1524,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, > > while (*p != NULL) { > > parent = *p; > > iter = rb_entry(parent, struct hist_entry, rb_node_in); > > - > > - cmp = 0; > > - perf_hpp_list__for_each_sort_list(hpp_list, fmt) { > > - cmp = fmt->collapse(fmt, iter, he); > > - if (cmp) > > - break; > > - } > > - > > + cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he); > > if (!cmp) { > > he_stat__add_stat(&iter->stat, &he->stat); > > return iter; > > @@ -1730,24 +1744,6 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) > > return 0; > > } > > > > -static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b) > > -{ > > - struct hists *hists = a->hists; > > - struct perf_hpp_fmt *fmt; > > - int64_t cmp = 0; > > - > > - hists__for_each_sort_list(hists, fmt) { > > - if (perf_hpp__should_skip(fmt, a->hists)) > > - continue; > > - > > - cmp = fmt->sort(fmt, a, b); > > - if (cmp) > > - break; > > - } > > - > > - return cmp; > > -} > > - > > static void hists__reset_filter_stats(struct hists *hists) > > { > > hists->nr_non_filtered_entries = 0; > > @@ -2449,21 +2445,15 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, > > struct rb_node **p; > > struct rb_node *parent = NULL; > > struct hist_entry *he; > > - struct perf_hpp_fmt *fmt; > > bool leftmost = true; > > > > p = &root->rb_root.rb_node; > > while (*p != NULL) { > > - int64_t cmp = 0; > > + int64_t cmp; > > > > parent = *p; > > he = rb_entry(parent, struct hist_entry, rb_node_in); > > - > > - perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { > > - cmp = fmt->collapse(fmt, he, pair); > > - if (cmp) > > - break; > > - } > > + cmp = hist_entry__collapse_hierarchy(he->hpp_list, he, pair); > > if (!cmp) > > goto out; > > > > @@ -2521,16 +2511,10 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *roo > > > > while (n) { > > struct hist_entry *iter; > > - struct perf_hpp_fmt *fmt; > > - int64_t cmp = 0; > > + int64_t cmp; > > > > iter = rb_entry(n, struct hist_entry, rb_node_in); > > - perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { > > - cmp = fmt->collapse(fmt, iter, he); > > - if (cmp) > > - break; > > - } > > - > > + cmp = hist_entry__collapse_hierarchy(he->hpp_list, iter, he); > > if (cmp < 0) > > n = n->rb_left; > > else if (cmp > 0) > > diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h > > index 1131056924d9c..6cff03eb043b7 100644 > > --- a/tools/perf/util/hist.h > > +++ b/tools/perf/util/hist.h > > @@ -342,8 +342,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, > > struct perf_hpp; > > struct perf_hpp_fmt; > > > > -int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); > > -int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); > > int hist_entry__transaction_len(void); > > int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, > > struct hists *hists); > > -- > > 2.47.1.613.gc27f4b7a9f-goog > > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code 2025-01-10 13:01 ` Dmitry Vyukov @ 2025-01-13 13:36 ` Dmitry Vyukov 0 siblings, 0 replies; 9+ messages in thread From: Dmitry Vyukov @ 2025-01-13 13:36 UTC (permalink / raw) To: Namhyung Kim; +Cc: irogers, linux-perf-users, linux-kernel On Fri, 10 Jan 2025 at 14:01, Dmitry Vyukov <dvyukov@google.com> wrote: > > On Thu, 9 Jan 2025 at 22:43, Namhyung Kim <namhyung@kernel.org> wrote: > > > > Hello, > > > > On Wed, Jan 08, 2025 at 08:36:53AM +0100, Dmitry Vyukov wrote: > > > Application of cmp/sort/collapse fmt callbacks is duplicated 6 times. > > > Factor it into a common helper function. NFC. > > > > It's interesting to use offsetof and cast it to functions. :) > > Is there a better way to do this? > Intuitively I reached for member pointers, but then realized it's not > a thing in C... > > > > > Signed-off-by: Dmitry Vyukov <dvyukov@google.com> > > > Cc: Namhyung Kim <namhyung@kernel.org> > > > Cc: Ian Rogers <irogers@google.com> > > > Cc: linux-perf-users@vger.kernel.org > > > Cc: linux-kernel@vger.kernel.org > > > --- > > > tools/perf/util/hist.c | 104 +++++++++++++++++------------------------ > > > tools/perf/util/hist.h | 2 - > > > 2 files changed, 44 insertions(+), 62 deletions(-) > > > > > > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c > > > index fff1345658016..8e4e844425370 100644 > > > --- a/tools/perf/util/hist.c > > > +++ b/tools/perf/util/hist.c > > > @@ -32,6 +32,9 @@ > > > #include <linux/time64.h> > > > #include <linux/zalloc.h> > > > > > > +static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); > > > +static int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); > > > + > > > static bool hists__filter_entry_by_dso(struct hists *hists, > > > struct hist_entry *he); > > > static bool hists__filter_entry_by_thread(struct hists *hists, > > > @@ -1292,19 +1295,27 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, > > > return err; > > > } > > > > > > -int64_t > > > -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) > > > +static int64_t > > > +hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left, > > > + struct hist_entry *right, unsigned long fn_offset, > > > + bool ignore_dynamic, bool ignore_skipped) > > > { > > > + typedef int64_t (*fn_t)(struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *); > > > > Probably better to put it in util/hist.h and each members (cmp, collapse, > > sort) use the typedef to make sure they remain in the same. Good idea!. Done in v2 > Noted. But let's decide on the "perf hist: Fix bogus profiles when > filters are enabled" first, that was the motivation for this > refactoring. > > > > > struct hists *hists = left->hists; > > > struct perf_hpp_fmt *fmt; > > > int64_t cmp = 0; > > > + fn_t fn; > > > > > > - hists__for_each_sort_list(hists, fmt) { > > > - if (perf_hpp__is_dynamic_entry(fmt) && > > > + perf_hpp_list__for_each_sort_list(hpp_list, fmt) { > > > + if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) && > > > !perf_hpp__defined_dynamic_entry(fmt, hists)) > > > continue; > > > > > > - cmp = fmt->cmp(fmt, left, right); > > > + if (ignore_skipped && perf_hpp__should_skip(fmt, hists)) > > > + continue; > > > + > > > + fn = *(fn_t *)(((char *)fmt) + fn_offset); > > > > Doesn't just below work? > > > > fn = (void *)fmt + fn_offset; No, this just calculates a pointer to the field that contains the function pointer. But we also need to dereference that field and get the function pointer. > > Thanks, > > Namhyung > > > > > > > + cmp = fn(fmt, left, right); > > > if (cmp) > > > break; > > > } > > > @@ -1313,23 +1324,33 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) > > > } > > > > > > int64_t > > > -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) > > > +hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) > > > { > > > - struct hists *hists = left->hists; > > > - struct perf_hpp_fmt *fmt; > > > - int64_t cmp = 0; > > > + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, > > > + offsetof(struct perf_hpp_fmt, cmp), true, false); > > > +} > > > > > > - hists__for_each_sort_list(hists, fmt) { > > > - if (perf_hpp__is_dynamic_entry(fmt) && > > > - !perf_hpp__defined_dynamic_entry(fmt, hists)) > > > - continue; > > > +static int64_t > > > +hist_entry__sort(struct hist_entry *left, struct hist_entry *right) > > > +{ > > > + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, > > > + offsetof(struct perf_hpp_fmt, sort), false, true); > > > +} > > > > > > - cmp = fmt->collapse(fmt, left, right); > > > - if (cmp) > > > - break; > > > - } > > > +int64_t > > > +hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) > > > +{ > > > + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, > > > + offsetof(struct perf_hpp_fmt, collapse), true, false); > > > +} > > > > > > - return cmp; > > > +static int64_t > > > +hist_entry__collapse_hierarchy(struct perf_hpp_list *hpp_list, > > > + struct hist_entry *left, > > > + struct hist_entry *right) > > > +{ > > > + return hist_entry__cmp_impl(hpp_list, left, right, > > > + offsetof(struct perf_hpp_fmt, collapse), false, false); > > > } > > > > > > void hist_entry__delete(struct hist_entry *he) > > > @@ -1503,14 +1524,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, > > > while (*p != NULL) { > > > parent = *p; > > > iter = rb_entry(parent, struct hist_entry, rb_node_in); > > > - > > > - cmp = 0; > > > - perf_hpp_list__for_each_sort_list(hpp_list, fmt) { > > > - cmp = fmt->collapse(fmt, iter, he); > > > - if (cmp) > > > - break; > > > - } > > > - > > > + cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he); > > > if (!cmp) { > > > he_stat__add_stat(&iter->stat, &he->stat); > > > return iter; > > > @@ -1730,24 +1744,6 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) > > > return 0; > > > } > > > > > > -static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b) > > > -{ > > > - struct hists *hists = a->hists; > > > - struct perf_hpp_fmt *fmt; > > > - int64_t cmp = 0; > > > - > > > - hists__for_each_sort_list(hists, fmt) { > > > - if (perf_hpp__should_skip(fmt, a->hists)) > > > - continue; > > > - > > > - cmp = fmt->sort(fmt, a, b); > > > - if (cmp) > > > - break; > > > - } > > > - > > > - return cmp; > > > -} > > > - > > > static void hists__reset_filter_stats(struct hists *hists) > > > { > > > hists->nr_non_filtered_entries = 0; > > > @@ -2449,21 +2445,15 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, > > > struct rb_node **p; > > > struct rb_node *parent = NULL; > > > struct hist_entry *he; > > > - struct perf_hpp_fmt *fmt; > > > bool leftmost = true; > > > > > > p = &root->rb_root.rb_node; > > > while (*p != NULL) { > > > - int64_t cmp = 0; > > > + int64_t cmp; > > > > > > parent = *p; > > > he = rb_entry(parent, struct hist_entry, rb_node_in); > > > - > > > - perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { > > > - cmp = fmt->collapse(fmt, he, pair); > > > - if (cmp) > > > - break; > > > - } > > > + cmp = hist_entry__collapse_hierarchy(he->hpp_list, he, pair); > > > if (!cmp) > > > goto out; > > > > > > @@ -2521,16 +2511,10 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *roo > > > > > > while (n) { > > > struct hist_entry *iter; > > > - struct perf_hpp_fmt *fmt; > > > - int64_t cmp = 0; > > > + int64_t cmp; > > > > > > iter = rb_entry(n, struct hist_entry, rb_node_in); > > > - perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { > > > - cmp = fmt->collapse(fmt, iter, he); > > > - if (cmp) > > > - break; > > > - } > > > - > > > + cmp = hist_entry__collapse_hierarchy(he->hpp_list, iter, he); > > > if (cmp < 0) > > > n = n->rb_left; > > > else if (cmp > 0) > > > diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h > > > index 1131056924d9c..6cff03eb043b7 100644 > > > --- a/tools/perf/util/hist.h > > > +++ b/tools/perf/util/hist.h > > > @@ -342,8 +342,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, > > > struct perf_hpp; > > > struct perf_hpp_fmt; > > > > > > -int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); > > > -int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); > > > int hist_entry__transaction_len(void); > > > int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, > > > struct hists *hists); > > > -- > > > 2.47.1.613.gc27f4b7a9f-goog > > > ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled 2025-01-08 8:23 [PATCH 0/2] perf hist: Fix bogus profiles when filters are enabled Dmitry Vyukov 2025-01-08 7:36 ` [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code Dmitry Vyukov @ 2025-01-08 7:36 ` Dmitry Vyukov 2025-01-09 21:59 ` Namhyung Kim 1 sibling, 1 reply; 9+ messages in thread From: Dmitry Vyukov @ 2025-01-08 7:36 UTC (permalink / raw) To: namhyung, irogers; +Cc: Dmitry Vyukov, linux-perf-users, linux-kernel When a filtered column is not present in the sort order, profiles become arbitrary broken. Filtered and non-filtered entries are collapsed together, and the filtered-by field ends up with a random value (either from a filtered or non-filtered entry). If we end up with filtered entry/value, then the whole collapsed entry will be filtered out and will be missing in the profile. If we end up with non-filtered entry/value, then the overhead value will be wrongly larger (include some subset of filtered out samples). This leads to very confusing profiles. The problem is hard to notice, and if noticed hard to understand. If the filter is for a single value, then it can be fixed by adding the corresponding field to the sort order (provided user understood the problem). But if the filter is for multiple values, it's impossible to fix b/c there is no concept of binary sorting based on filter predicate (we want to group all non-filtered values in one bucket, and all filtered values in another). Examples of affected commands: perf report --tid=123 perf report --sort overhead,symbol --comm=foo,bar Fix this by considering filtered status as the highest priority sort/collapse predicate. As a side effect this effectively adds a new feature of showing profile where several lines are combined based on arbitrary filtering predicate. For example, showing symbols from binaries foo and bar combined together, but not from other binaries; or showing combined overhead of several particular threads. Signed-off-by: Dmitry Vyukov <dvyukov@google.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Ian Rogers <irogers@google.com> Cc: linux-perf-users@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- tools/perf/util/hist.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 8e4e844425370..b70170d854a0c 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1303,9 +1303,18 @@ hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left, typedef int64_t (*fn_t)(struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *); struct hists *hists = left->hists; struct perf_hpp_fmt *fmt; - int64_t cmp = 0; + int64_t cmp; fn_t fn; + /* + * Never collapse filtered and non-filtered entries. + * Note this is not the same as having an extra (invisible) fmt + * that corresponds to the filtered status. + */ + cmp = (int64_t)!!left->filtered - (int64_t)!!right->filtered; + if (cmp) + return cmp; + perf_hpp_list__for_each_sort_list(hpp_list, fmt) { if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) && !perf_hpp__defined_dynamic_entry(fmt, hists)) -- 2.47.1.613.gc27f4b7a9f-goog ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled 2025-01-08 7:36 ` [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled Dmitry Vyukov @ 2025-01-09 21:59 ` Namhyung Kim 2025-01-10 12:59 ` Dmitry Vyukov 0 siblings, 1 reply; 9+ messages in thread From: Namhyung Kim @ 2025-01-09 21:59 UTC (permalink / raw) To: Dmitry Vyukov; +Cc: irogers, linux-perf-users, linux-kernel On Wed, Jan 08, 2025 at 08:36:54AM +0100, Dmitry Vyukov wrote: > When a filtered column is not present in the sort order, profiles become > arbitrary broken. Filtered and non-filtered entries are collapsed > together, and the filtered-by field ends up with a random value (either > from a filtered or non-filtered entry). If we end up with filtered > entry/value, then the whole collapsed entry will be filtered out and will > be missing in the profile. If we end up with non-filtered entry/value, > then the overhead value will be wrongly larger (include some subset > of filtered out samples). > > This leads to very confusing profiles. The problem is hard to notice, > and if noticed hard to understand. If the filter is for a single value, > then it can be fixed by adding the corresponding field to the sort order > (provided user understood the problem). But if the filter is for multiple > values, it's impossible to fix b/c there is no concept of binary sorting > based on filter predicate (we want to group all non-filtered values in > one bucket, and all filtered values in another). > > Examples of affected commands: > perf report --tid=123 > perf report --sort overhead,symbol --comm=foo,bar > > Fix this by considering filtered status as the highest priority > sort/collapse predicate. Do you mean when you specify filters not in a sort list? I guess it's an undefined behavior.. probably better to warn users not to do that and exit. The hist entries are built using only the fields in the sort list. Filtering by unspecified fields would be meaningless and won't work well IMHO. > > As a side effect this effectively adds a new feature of showing profile > where several lines are combined based on arbitrary filtering predicate. > For example, showing symbols from binaries foo and bar combined together, > but not from other binaries; or showing combined overhead of several > particular threads. I'm not sure I'm following. I'm seeing this even with your change. Without filters: $ ./perf report -s dso --stdio -q 98.41% perf 1.11% [kernel.kallsyms] 0.41% ld-linux-x86-64.so.2 0.05% libLLVM-16.so.1 0.03% libc.so.6 With filter: $ ./perf report -s dso -d '[kernel.kallsyms],libc.so.6' --stdio -q 1.11% [kernel.kallsyms] 0.03% libc.so.6 You said you want to have them combined together, right? But I think the current behavior makes more sense. Thanks, Namhyung > > Signed-off-by: Dmitry Vyukov <dvyukov@google.com> > Cc: Namhyung Kim <namhyung@kernel.org> > Cc: Ian Rogers <irogers@google.com> > Cc: linux-perf-users@vger.kernel.org > Cc: linux-kernel@vger.kernel.org > --- > tools/perf/util/hist.c | 11 ++++++++++- > 1 file changed, 10 insertions(+), 1 deletion(-) > > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c > index 8e4e844425370..b70170d854a0c 100644 > --- a/tools/perf/util/hist.c > +++ b/tools/perf/util/hist.c > @@ -1303,9 +1303,18 @@ hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left, > typedef int64_t (*fn_t)(struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *); > struct hists *hists = left->hists; > struct perf_hpp_fmt *fmt; > - int64_t cmp = 0; > + int64_t cmp; > fn_t fn; > > + /* > + * Never collapse filtered and non-filtered entries. > + * Note this is not the same as having an extra (invisible) fmt > + * that corresponds to the filtered status. > + */ > + cmp = (int64_t)!!left->filtered - (int64_t)!!right->filtered; > + if (cmp) > + return cmp; > + > perf_hpp_list__for_each_sort_list(hpp_list, fmt) { > if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) && > !perf_hpp__defined_dynamic_entry(fmt, hists)) > -- > 2.47.1.613.gc27f4b7a9f-goog > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled 2025-01-09 21:59 ` Namhyung Kim @ 2025-01-10 12:59 ` Dmitry Vyukov 2025-01-11 0:29 ` Namhyung Kim 0 siblings, 1 reply; 9+ messages in thread From: Dmitry Vyukov @ 2025-01-10 12:59 UTC (permalink / raw) To: Namhyung Kim; +Cc: irogers, linux-perf-users, linux-kernel On Thu, 9 Jan 2025 at 22:59, Namhyung Kim <namhyung@kernel.org> wrote: > > On Wed, Jan 08, 2025 at 08:36:54AM +0100, Dmitry Vyukov wrote: > > When a filtered column is not present in the sort order, profiles become > > arbitrary broken. Filtered and non-filtered entries are collapsed > > together, and the filtered-by field ends up with a random value (either > > from a filtered or non-filtered entry). If we end up with filtered > > entry/value, then the whole collapsed entry will be filtered out and will > > be missing in the profile. If we end up with non-filtered entry/value, > > then the overhead value will be wrongly larger (include some subset > > of filtered out samples). > > > > This leads to very confusing profiles. The problem is hard to notice, > > and if noticed hard to understand. If the filter is for a single value, > > then it can be fixed by adding the corresponding field to the sort order > > (provided user understood the problem). But if the filter is for multiple > > values, it's impossible to fix b/c there is no concept of binary sorting > > based on filter predicate (we want to group all non-filtered values in > > one bucket, and all filtered values in another). > > > > Examples of affected commands: > > perf report --tid=123 > > perf report --sort overhead,symbol --comm=foo,bar > > > > Fix this by considering filtered status as the highest priority > > sort/collapse predicate. > > Do you mean when you specify filters not in a sort list? > > I guess it's an undefined behavior.. probably better to warn users not > to do that and exit. The hist entries are built using only the fields > in the sort list. Filtering by unspecified fields would be meaningless > and won't work well IMHO. We could warn as well, but I do want this feature for: https://lore.kernel.org/linux-perf-users/CACT4Y+an1LSY15f9MS_vnbaaeeqMf+k4-Dqqfu-zwcUAHFNk=w@mail.gmail.com/T/#t I think it may be useful for other cases as well, let's discuss the feature below. Why won't it work well? At least in my local manual testing, it worked well. This does make hist built using filtered fields as well. > > As a side effect this effectively adds a new feature of showing profile > > where several lines are combined based on arbitrary filtering predicate. > > For example, showing symbols from binaries foo and bar combined together, > > but not from other binaries; or showing combined overhead of several > > particular threads. > > I'm not sure I'm following. I'm seeing this even with your change. > > Without filters: > > $ ./perf report -s dso --stdio -q > 98.41% perf > 1.11% [kernel.kallsyms] > 0.41% ld-linux-x86-64.so.2 > 0.05% libLLVM-16.so.1 > 0.03% libc.so.6 > > > With filter: > > $ ./perf report -s dso -d '[kernel.kallsyms],libc.so.6' --stdio -q > 1.11% [kernel.kallsyms] > 0.03% libc.so.6 > > You said you want to have them combined together, right? > But I think the current behavior makes more sense. If you add a field to both sort and filter, then this change does not affect anything (filtered fields already participate in sorting/merging). It affects things when the filter field is not in sort. Consider the following queries: "What symbols consume time in this subset of binaries (but not in the rest)?" Naively, one would try: perf report --sort=symbol --comm=cc1,ld And this deceptively works and shows something, but as far as I understand these are random numbers now. With this change you can actually do this (you will see things like malloc/free in both of these binaries, or if one profiles e.g. Go programs, then there will be lots of common symbols from runtime/stdlin). Another example: profile for this particular subset of threads/cpus (but not for the rest). And in particular for: https://lore.kernel.org/linux-perf-users/CACT4Y+an1LSY15f9MS_vnbaaeeqMf+k4-Dqqfu-zwcUAHFNk=w@mail.gmail.com/T/#t I want to enable: --parallelism=1-8 filter. Currently you will either see random garbage numbers, or if you add 'parallelism' to sort order, then you will see each symbol/comm duplicated 8 times for each separate value of parallelism (which is not useful). But currently you can't see symbol/comm for these 8 parallelism levels combined, but not for the rest. All-in-all, this looks like a change that defines behavior that's currently undefined in a useful and intuitive way, without changing any of the currently defined behaviors. > Thanks, > Namhyung > > > > > Signed-off-by: Dmitry Vyukov <dvyukov@google.com> > > Cc: Namhyung Kim <namhyung@kernel.org> > > Cc: Ian Rogers <irogers@google.com> > > Cc: linux-perf-users@vger.kernel.org > > Cc: linux-kernel@vger.kernel.org > > --- > > tools/perf/util/hist.c | 11 ++++++++++- > > 1 file changed, 10 insertions(+), 1 deletion(-) > > > > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c > > index 8e4e844425370..b70170d854a0c 100644 > > --- a/tools/perf/util/hist.c > > +++ b/tools/perf/util/hist.c > > @@ -1303,9 +1303,18 @@ hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left, > > typedef int64_t (*fn_t)(struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *); > > struct hists *hists = left->hists; > > struct perf_hpp_fmt *fmt; > > - int64_t cmp = 0; > > + int64_t cmp; > > fn_t fn; > > > > + /* > > + * Never collapse filtered and non-filtered entries. > > + * Note this is not the same as having an extra (invisible) fmt > > + * that corresponds to the filtered status. > > + */ > > + cmp = (int64_t)!!left->filtered - (int64_t)!!right->filtered; > > + if (cmp) > > + return cmp; > > + > > perf_hpp_list__for_each_sort_list(hpp_list, fmt) { > > if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) && > > !perf_hpp__defined_dynamic_entry(fmt, hists)) > > -- > > 2.47.1.613.gc27f4b7a9f-goog > > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled 2025-01-10 12:59 ` Dmitry Vyukov @ 2025-01-11 0:29 ` Namhyung Kim 0 siblings, 0 replies; 9+ messages in thread From: Namhyung Kim @ 2025-01-11 0:29 UTC (permalink / raw) To: Dmitry Vyukov; +Cc: irogers, linux-perf-users, linux-kernel On Fri, Jan 10, 2025 at 01:59:54PM +0100, Dmitry Vyukov wrote: > On Thu, 9 Jan 2025 at 22:59, Namhyung Kim <namhyung@kernel.org> wrote: > > > > On Wed, Jan 08, 2025 at 08:36:54AM +0100, Dmitry Vyukov wrote: > > > When a filtered column is not present in the sort order, profiles become > > > arbitrary broken. Filtered and non-filtered entries are collapsed > > > together, and the filtered-by field ends up with a random value (either > > > from a filtered or non-filtered entry). If we end up with filtered > > > entry/value, then the whole collapsed entry will be filtered out and will > > > be missing in the profile. If we end up with non-filtered entry/value, > > > then the overhead value will be wrongly larger (include some subset > > > of filtered out samples). > > > > > > This leads to very confusing profiles. The problem is hard to notice, > > > and if noticed hard to understand. If the filter is for a single value, > > > then it can be fixed by adding the corresponding field to the sort order > > > (provided user understood the problem). But if the filter is for multiple > > > values, it's impossible to fix b/c there is no concept of binary sorting > > > based on filter predicate (we want to group all non-filtered values in > > > one bucket, and all filtered values in another). > > > > > > Examples of affected commands: > > > perf report --tid=123 > > > perf report --sort overhead,symbol --comm=foo,bar > > > > > > Fix this by considering filtered status as the highest priority > > > sort/collapse predicate. > > > > Do you mean when you specify filters not in a sort list? > > > > I guess it's an undefined behavior.. probably better to warn users not > > to do that and exit. The hist entries are built using only the fields > > in the sort list. Filtering by unspecified fields would be meaningless > > and won't work well IMHO. > > We could warn as well, but I do want this feature for: > https://lore.kernel.org/linux-perf-users/CACT4Y+an1LSY15f9MS_vnbaaeeqMf+k4-Dqqfu-zwcUAHFNk=w@mail.gmail.com/T/#t > I think it may be useful for other cases as well, let's discuss the > feature below. > Why won't it work well? At least in my local manual testing, it worked > well. This does make hist built using filtered fields as well. I thought hist entry was constructed only for the fields in the sort list and then apply the filter on the hist entry. But looking at the code, I've realized that the filter is set before the hist entry is created. Then it would work.. > > > > > As a side effect this effectively adds a new feature of showing profile > > > where several lines are combined based on arbitrary filtering predicate. > > > For example, showing symbols from binaries foo and bar combined together, > > > but not from other binaries; or showing combined overhead of several > > > particular threads. > > > > I'm not sure I'm following. I'm seeing this even with your change. > > > > Without filters: > > > > $ ./perf report -s dso --stdio -q > > 98.41% perf > > 1.11% [kernel.kallsyms] > > 0.41% ld-linux-x86-64.so.2 > > 0.05% libLLVM-16.so.1 > > 0.03% libc.so.6 > > > > > > With filter: > > > > $ ./perf report -s dso -d '[kernel.kallsyms],libc.so.6' --stdio -q > > 1.11% [kernel.kallsyms] > > 0.03% libc.so.6 > > > > You said you want to have them combined together, right? > > But I think the current behavior makes more sense. > > > If you add a field to both sort and filter, then this change does not > affect anything (filtered fields already participate in > sorting/merging). It affects things when the filter field is not in > sort. > > Consider the following queries: > "What symbols consume time in this subset of binaries (but not in the rest)?" > Naively, one would try: > perf report --sort=symbol --comm=cc1,ld > And this deceptively works and shows something, but as far as I > understand these are random numbers now. Right, that's what I expected. But I agree with you that this type of filtering can be useful and intuitive. > With this change you can actually do this (you will see things like > malloc/free in both of these binaries, or if one profiles e.g. Go > programs, then there will be lots of common symbols from > runtime/stdlin). Yep, by not merging the filtered entries you can get the precise profile of symbols from the specified binaries only. > > Another example: profile for this particular subset of threads/cpus > (but not for the rest). > > And in particular for: > https://lore.kernel.org/linux-perf-users/CACT4Y+an1LSY15f9MS_vnbaaeeqMf+k4-Dqqfu-zwcUAHFNk=w@mail.gmail.com/T/#t > I want to enable: --parallelism=1-8 filter. > Currently you will either see random garbage numbers, or if you add > 'parallelism' to sort order, then you will see each symbol/comm > duplicated 8 times for each separate value of parallelism (which is > not useful). But currently you can't see symbol/comm for these 8 > parallelism levels combined, but not for the rest. Interesting, I'll take a look at it later. > > All-in-all, this looks like a change that defines behavior that's > currently undefined in a useful and intuitive way, without changing > any of the currently defined behaviors. Makes sense. Thanks, Namhyung ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2025-01-13 13:36 UTC | newest] Thread overview: 9+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2025-01-08 8:23 [PATCH 0/2] perf hist: Fix bogus profiles when filters are enabled Dmitry Vyukov 2025-01-08 7:36 ` [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code Dmitry Vyukov 2025-01-09 21:43 ` Namhyung Kim 2025-01-10 13:01 ` Dmitry Vyukov 2025-01-13 13:36 ` Dmitry Vyukov 2025-01-08 7:36 ` [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled Dmitry Vyukov 2025-01-09 21:59 ` Namhyung Kim 2025-01-10 12:59 ` Dmitry Vyukov 2025-01-11 0:29 ` Namhyung Kim
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).