linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code
  2025-01-08  8:23 [PATCH 0/2] perf hist: Fix bogus profiles when filters are enabled Dmitry Vyukov
@ 2025-01-08  7:36 ` Dmitry Vyukov
  2025-01-09 21:43   ` Namhyung Kim
  2025-01-08  7:36 ` [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled Dmitry Vyukov
  1 sibling, 1 reply; 9+ messages in thread
From: Dmitry Vyukov @ 2025-01-08  7:36 UTC (permalink / raw)
  To: namhyung, irogers; +Cc: Dmitry Vyukov, linux-perf-users, linux-kernel

Application of cmp/sort/collapse fmt callbacks is duplicated 6 times.
Factor it into a common helper function. NFC.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: linux-perf-users@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 tools/perf/util/hist.c | 104 +++++++++++++++++------------------------
 tools/perf/util/hist.h |   2 -
 2 files changed, 44 insertions(+), 62 deletions(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index fff1345658016..8e4e844425370 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -32,6 +32,9 @@
 #include <linux/time64.h>
 #include <linux/zalloc.h>
 
+static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
+static int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
+
 static bool hists__filter_entry_by_dso(struct hists *hists,
 				       struct hist_entry *he);
 static bool hists__filter_entry_by_thread(struct hists *hists,
@@ -1292,19 +1295,27 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 	return err;
 }
 
-int64_t
-hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+static int64_t
+hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left,
+		     struct hist_entry *right, unsigned long fn_offset,
+		     bool ignore_dynamic, bool ignore_skipped)
 {
+	typedef int64_t (*fn_t)(struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *);
 	struct hists *hists = left->hists;
 	struct perf_hpp_fmt *fmt;
 	int64_t cmp = 0;
+	fn_t fn;
 
-	hists__for_each_sort_list(hists, fmt) {
-		if (perf_hpp__is_dynamic_entry(fmt) &&
+	perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+		if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) &&
 		    !perf_hpp__defined_dynamic_entry(fmt, hists))
 			continue;
 
-		cmp = fmt->cmp(fmt, left, right);
+		if (ignore_skipped && perf_hpp__should_skip(fmt, hists))
+			continue;
+
+		fn = *(fn_t *)(((char *)fmt) + fn_offset);
+		cmp = fn(fmt, left, right);
 		if (cmp)
 			break;
 	}
@@ -1313,23 +1324,33 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
 }
 
 int64_t
-hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	struct hists *hists = left->hists;
-	struct perf_hpp_fmt *fmt;
-	int64_t cmp = 0;
+	return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
+		offsetof(struct perf_hpp_fmt, cmp), true, false);
+}
 
-	hists__for_each_sort_list(hists, fmt) {
-		if (perf_hpp__is_dynamic_entry(fmt) &&
-		    !perf_hpp__defined_dynamic_entry(fmt, hists))
-			continue;
+static int64_t
+hist_entry__sort(struct hist_entry *left, struct hist_entry *right)
+{
+	return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
+		offsetof(struct perf_hpp_fmt, sort), false, true);
+}
 
-		cmp = fmt->collapse(fmt, left, right);
-		if (cmp)
-			break;
-	}
+int64_t
+hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
+		offsetof(struct perf_hpp_fmt, collapse), true, false);
+}
 
-	return cmp;
+static int64_t
+hist_entry__collapse_hierarchy(struct perf_hpp_list *hpp_list,
+			       struct hist_entry *left,
+			       struct hist_entry *right)
+{
+	return hist_entry__cmp_impl(hpp_list, left, right,
+		offsetof(struct perf_hpp_fmt, collapse), false, false);
 }
 
 void hist_entry__delete(struct hist_entry *he)
@@ -1503,14 +1524,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
 	while (*p != NULL) {
 		parent = *p;
 		iter = rb_entry(parent, struct hist_entry, rb_node_in);
-
-		cmp = 0;
-		perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
-			cmp = fmt->collapse(fmt, iter, he);
-			if (cmp)
-				break;
-		}
-
+		cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he);
 		if (!cmp) {
 			he_stat__add_stat(&iter->stat, &he->stat);
 			return iter;
@@ -1730,24 +1744,6 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
 	return 0;
 }
 
-static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
-{
-	struct hists *hists = a->hists;
-	struct perf_hpp_fmt *fmt;
-	int64_t cmp = 0;
-
-	hists__for_each_sort_list(hists, fmt) {
-		if (perf_hpp__should_skip(fmt, a->hists))
-			continue;
-
-		cmp = fmt->sort(fmt, a, b);
-		if (cmp)
-			break;
-	}
-
-	return cmp;
-}
-
 static void hists__reset_filter_stats(struct hists *hists)
 {
 	hists->nr_non_filtered_entries = 0;
@@ -2449,21 +2445,15 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
 	struct hist_entry *he;
-	struct perf_hpp_fmt *fmt;
 	bool leftmost = true;
 
 	p = &root->rb_root.rb_node;
 	while (*p != NULL) {
-		int64_t cmp = 0;
+		int64_t cmp;
 
 		parent = *p;
 		he = rb_entry(parent, struct hist_entry, rb_node_in);
-
-		perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
-			cmp = fmt->collapse(fmt, he, pair);
-			if (cmp)
-				break;
-		}
+		cmp = hist_entry__collapse_hierarchy(he->hpp_list, he, pair);
 		if (!cmp)
 			goto out;
 
@@ -2521,16 +2511,10 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *roo
 
 	while (n) {
 		struct hist_entry *iter;
-		struct perf_hpp_fmt *fmt;
-		int64_t cmp = 0;
+		int64_t cmp;
 
 		iter = rb_entry(n, struct hist_entry, rb_node_in);
-		perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
-			cmp = fmt->collapse(fmt, iter, he);
-			if (cmp)
-				break;
-		}
-
+		cmp = hist_entry__collapse_hierarchy(he->hpp_list, iter, he);
 		if (cmp < 0)
 			n = n->rb_left;
 		else if (cmp > 0)
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 1131056924d9c..6cff03eb043b7 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -342,8 +342,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 struct perf_hpp;
 struct perf_hpp_fmt;
 
-int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
-int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
 int hist_entry__transaction_len(void);
 int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
 			      struct hists *hists);
-- 
2.47.1.613.gc27f4b7a9f-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled
  2025-01-08  8:23 [PATCH 0/2] perf hist: Fix bogus profiles when filters are enabled Dmitry Vyukov
  2025-01-08  7:36 ` [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code Dmitry Vyukov
@ 2025-01-08  7:36 ` Dmitry Vyukov
  2025-01-09 21:59   ` Namhyung Kim
  1 sibling, 1 reply; 9+ messages in thread
From: Dmitry Vyukov @ 2025-01-08  7:36 UTC (permalink / raw)
  To: namhyung, irogers; +Cc: Dmitry Vyukov, linux-perf-users, linux-kernel

When a filtered column is not present in the sort order, profiles become
arbitrary broken. Filtered and non-filtered entries are collapsed
together, and the filtered-by field ends up with a random value (either
from a filtered or non-filtered entry). If we end up with filtered
entry/value, then the whole collapsed entry will be filtered out and will
be missing in the profile. If we end up with non-filtered entry/value,
then the overhead value will be wrongly larger (include some subset
of filtered out samples).

This leads to very confusing profiles. The problem is hard to notice,
and if noticed hard to understand. If the filter is for a single value,
then it can be fixed by adding the corresponding field to the sort order
(provided user understood the problem). But if the filter is for multiple
values, it's impossible to fix b/c there is no concept of binary sorting
based on filter predicate (we want to group all non-filtered values in
one bucket, and all filtered values in another).

Examples of affected commands:
perf report --tid=123
perf report --sort overhead,symbol --comm=foo,bar

Fix this by considering filtered status as the highest priority
sort/collapse predicate.

As a side effect this effectively adds a new feature of showing profile
where several lines are combined based on arbitrary filtering predicate.
For example, showing symbols from binaries foo and bar combined together,
but not from other binaries; or showing combined overhead of several
particular threads.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: linux-perf-users@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 tools/perf/util/hist.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 8e4e844425370..b70170d854a0c 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1303,9 +1303,18 @@ hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left,
 	typedef int64_t (*fn_t)(struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *);
 	struct hists *hists = left->hists;
 	struct perf_hpp_fmt *fmt;
-	int64_t cmp = 0;
+	int64_t cmp;
 	fn_t fn;
 
+	/*
+	 * Never collapse filtered and non-filtered entries.
+	 * Note this is not the same as having an extra (invisible) fmt
+	 * that corresponds to the filtered status.
+	 */
+	cmp = (int64_t)!!left->filtered - (int64_t)!!right->filtered;
+	if (cmp)
+		return cmp;
+
 	perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
 		if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) &&
 		    !perf_hpp__defined_dynamic_entry(fmt, hists))
-- 
2.47.1.613.gc27f4b7a9f-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 0/2] perf hist: Fix bogus profiles when filters are enabled
@ 2025-01-08  8:23 Dmitry Vyukov
  2025-01-08  7:36 ` [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code Dmitry Vyukov
  2025-01-08  7:36 ` [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled Dmitry Vyukov
  0 siblings, 2 replies; 9+ messages in thread
From: Dmitry Vyukov @ 2025-01-08  8:23 UTC (permalink / raw)
  To: namhyung, irogers; +Cc: linux-perf-users, linux-kernel, Dmitry Vyukov

Dmitry Vyukov (2):
  perf hist: Deduplicate cmp/sort/collapse code
  perf hist: Fix bogus profiles when filters are enabled

 tools/perf/util/hist.c | 115 +++++++++++++++++++----------------------
 tools/perf/util/hist.h |   2 -
 2 files changed, 54 insertions(+), 63 deletions(-)


base-commit: 09a0fa92e5b45e99cf435b2fbf5ebcf889cf8780
-- 
2.47.1.613.gc27f4b7a9f-goog


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code
  2025-01-08  7:36 ` [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code Dmitry Vyukov
@ 2025-01-09 21:43   ` Namhyung Kim
  2025-01-10 13:01     ` Dmitry Vyukov
  0 siblings, 1 reply; 9+ messages in thread
From: Namhyung Kim @ 2025-01-09 21:43 UTC (permalink / raw)
  To: Dmitry Vyukov; +Cc: irogers, linux-perf-users, linux-kernel

Hello,

On Wed, Jan 08, 2025 at 08:36:53AM +0100, Dmitry Vyukov wrote:
> Application of cmp/sort/collapse fmt callbacks is duplicated 6 times.
> Factor it into a common helper function. NFC.

It's interesting to use offsetof and cast it to functions. :)

> 
> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Cc: Ian Rogers <irogers@google.com>
> Cc: linux-perf-users@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> ---
>  tools/perf/util/hist.c | 104 +++++++++++++++++------------------------
>  tools/perf/util/hist.h |   2 -
>  2 files changed, 44 insertions(+), 62 deletions(-)
> 
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index fff1345658016..8e4e844425370 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -32,6 +32,9 @@
>  #include <linux/time64.h>
>  #include <linux/zalloc.h>
>  
> +static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
> +static int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
> +
>  static bool hists__filter_entry_by_dso(struct hists *hists,
>  				       struct hist_entry *he);
>  static bool hists__filter_entry_by_thread(struct hists *hists,
> @@ -1292,19 +1295,27 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
>  	return err;
>  }
>  
> -int64_t
> -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
> +static int64_t
> +hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left,
> +		     struct hist_entry *right, unsigned long fn_offset,
> +		     bool ignore_dynamic, bool ignore_skipped)
>  {
> +	typedef int64_t (*fn_t)(struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *);

Probably better to put it in util/hist.h and each members (cmp, collapse,
sort) use the typedef to make sure they remain in the same.


>  	struct hists *hists = left->hists;
>  	struct perf_hpp_fmt *fmt;
>  	int64_t cmp = 0;
> +	fn_t fn;
>  
> -	hists__for_each_sort_list(hists, fmt) {
> -		if (perf_hpp__is_dynamic_entry(fmt) &&
> +	perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
> +		if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) &&
>  		    !perf_hpp__defined_dynamic_entry(fmt, hists))
>  			continue;
>  
> -		cmp = fmt->cmp(fmt, left, right);
> +		if (ignore_skipped && perf_hpp__should_skip(fmt, hists))
> +			continue;
> +
> +		fn = *(fn_t *)(((char *)fmt) + fn_offset);

Doesn't just below work?

		fn = (void *)fmt + fn_offset;

Thanks,
Namhyung


> +		cmp = fn(fmt, left, right);
>  		if (cmp)
>  			break;
>  	}
> @@ -1313,23 +1324,33 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
>  }
>  
>  int64_t
> -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
> +hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
>  {
> -	struct hists *hists = left->hists;
> -	struct perf_hpp_fmt *fmt;
> -	int64_t cmp = 0;
> +	return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
> +		offsetof(struct perf_hpp_fmt, cmp), true, false);
> +}
>  
> -	hists__for_each_sort_list(hists, fmt) {
> -		if (perf_hpp__is_dynamic_entry(fmt) &&
> -		    !perf_hpp__defined_dynamic_entry(fmt, hists))
> -			continue;
> +static int64_t
> +hist_entry__sort(struct hist_entry *left, struct hist_entry *right)
> +{
> +	return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
> +		offsetof(struct perf_hpp_fmt, sort), false, true);
> +}
>  
> -		cmp = fmt->collapse(fmt, left, right);
> -		if (cmp)
> -			break;
> -	}
> +int64_t
> +hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
> +{
> +	return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
> +		offsetof(struct perf_hpp_fmt, collapse), true, false);
> +}
>  
> -	return cmp;
> +static int64_t
> +hist_entry__collapse_hierarchy(struct perf_hpp_list *hpp_list,
> +			       struct hist_entry *left,
> +			       struct hist_entry *right)
> +{
> +	return hist_entry__cmp_impl(hpp_list, left, right,
> +		offsetof(struct perf_hpp_fmt, collapse), false, false);
>  }
>  
>  void hist_entry__delete(struct hist_entry *he)
> @@ -1503,14 +1524,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
>  	while (*p != NULL) {
>  		parent = *p;
>  		iter = rb_entry(parent, struct hist_entry, rb_node_in);
> -
> -		cmp = 0;
> -		perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
> -			cmp = fmt->collapse(fmt, iter, he);
> -			if (cmp)
> -				break;
> -		}
> -
> +		cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he);
>  		if (!cmp) {
>  			he_stat__add_stat(&iter->stat, &he->stat);
>  			return iter;
> @@ -1730,24 +1744,6 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
>  	return 0;
>  }
>  
> -static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
> -{
> -	struct hists *hists = a->hists;
> -	struct perf_hpp_fmt *fmt;
> -	int64_t cmp = 0;
> -
> -	hists__for_each_sort_list(hists, fmt) {
> -		if (perf_hpp__should_skip(fmt, a->hists))
> -			continue;
> -
> -		cmp = fmt->sort(fmt, a, b);
> -		if (cmp)
> -			break;
> -	}
> -
> -	return cmp;
> -}
> -
>  static void hists__reset_filter_stats(struct hists *hists)
>  {
>  	hists->nr_non_filtered_entries = 0;
> @@ -2449,21 +2445,15 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
>  	struct rb_node **p;
>  	struct rb_node *parent = NULL;
>  	struct hist_entry *he;
> -	struct perf_hpp_fmt *fmt;
>  	bool leftmost = true;
>  
>  	p = &root->rb_root.rb_node;
>  	while (*p != NULL) {
> -		int64_t cmp = 0;
> +		int64_t cmp;
>  
>  		parent = *p;
>  		he = rb_entry(parent, struct hist_entry, rb_node_in);
> -
> -		perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
> -			cmp = fmt->collapse(fmt, he, pair);
> -			if (cmp)
> -				break;
> -		}
> +		cmp = hist_entry__collapse_hierarchy(he->hpp_list, he, pair);
>  		if (!cmp)
>  			goto out;
>  
> @@ -2521,16 +2511,10 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *roo
>  
>  	while (n) {
>  		struct hist_entry *iter;
> -		struct perf_hpp_fmt *fmt;
> -		int64_t cmp = 0;
> +		int64_t cmp;
>  
>  		iter = rb_entry(n, struct hist_entry, rb_node_in);
> -		perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
> -			cmp = fmt->collapse(fmt, iter, he);
> -			if (cmp)
> -				break;
> -		}
> -
> +		cmp = hist_entry__collapse_hierarchy(he->hpp_list, iter, he);
>  		if (cmp < 0)
>  			n = n->rb_left;
>  		else if (cmp > 0)
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 1131056924d9c..6cff03eb043b7 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -342,8 +342,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
>  struct perf_hpp;
>  struct perf_hpp_fmt;
>  
> -int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
> -int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
>  int hist_entry__transaction_len(void);
>  int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
>  			      struct hists *hists);
> -- 
> 2.47.1.613.gc27f4b7a9f-goog
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled
  2025-01-08  7:36 ` [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled Dmitry Vyukov
@ 2025-01-09 21:59   ` Namhyung Kim
  2025-01-10 12:59     ` Dmitry Vyukov
  0 siblings, 1 reply; 9+ messages in thread
From: Namhyung Kim @ 2025-01-09 21:59 UTC (permalink / raw)
  To: Dmitry Vyukov; +Cc: irogers, linux-perf-users, linux-kernel

On Wed, Jan 08, 2025 at 08:36:54AM +0100, Dmitry Vyukov wrote:
> When a filtered column is not present in the sort order, profiles become
> arbitrary broken. Filtered and non-filtered entries are collapsed
> together, and the filtered-by field ends up with a random value (either
> from a filtered or non-filtered entry). If we end up with filtered
> entry/value, then the whole collapsed entry will be filtered out and will
> be missing in the profile. If we end up with non-filtered entry/value,
> then the overhead value will be wrongly larger (include some subset
> of filtered out samples).
> 
> This leads to very confusing profiles. The problem is hard to notice,
> and if noticed hard to understand. If the filter is for a single value,
> then it can be fixed by adding the corresponding field to the sort order
> (provided user understood the problem). But if the filter is for multiple
> values, it's impossible to fix b/c there is no concept of binary sorting
> based on filter predicate (we want to group all non-filtered values in
> one bucket, and all filtered values in another).
> 
> Examples of affected commands:
> perf report --tid=123
> perf report --sort overhead,symbol --comm=foo,bar
> 
> Fix this by considering filtered status as the highest priority
> sort/collapse predicate.

Do you mean when you specify filters not in a sort list?

I guess it's an undefined behavior.. probably better to warn users not
to do that and exit.  The hist entries are built using only the fields
in the sort list.  Filtering by unspecified fields would be meaningless
and won't work well IMHO.

> 
> As a side effect this effectively adds a new feature of showing profile
> where several lines are combined based on arbitrary filtering predicate.
> For example, showing symbols from binaries foo and bar combined together,
> but not from other binaries; or showing combined overhead of several
> particular threads.

I'm not sure I'm following.  I'm seeing this even with your change.

Without filters:

  $ ./perf report -s dso --stdio -q
      98.41%  perf                
       1.11%  [kernel.kallsyms]   
       0.41%  ld-linux-x86-64.so.2
       0.05%  libLLVM-16.so.1     
       0.03%  libc.so.6           
  
  
With filter:

  $ ./perf report -s dso -d '[kernel.kallsyms],libc.so.6' --stdio -q
       1.11%  [kernel.kallsyms]
       0.03%  libc.so.6        

You said you want to have them combined together, right?
But I think the current behavior makes more sense.

Thanks,
Namhyung

> 
> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Cc: Ian Rogers <irogers@google.com>
> Cc: linux-perf-users@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> ---
>  tools/perf/util/hist.c | 11 ++++++++++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index 8e4e844425370..b70170d854a0c 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -1303,9 +1303,18 @@ hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left,
>  	typedef int64_t (*fn_t)(struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *);
>  	struct hists *hists = left->hists;
>  	struct perf_hpp_fmt *fmt;
> -	int64_t cmp = 0;
> +	int64_t cmp;
>  	fn_t fn;
>  
> +	/*
> +	 * Never collapse filtered and non-filtered entries.
> +	 * Note this is not the same as having an extra (invisible) fmt
> +	 * that corresponds to the filtered status.
> +	 */
> +	cmp = (int64_t)!!left->filtered - (int64_t)!!right->filtered;
> +	if (cmp)
> +		return cmp;
> +
>  	perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
>  		if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) &&
>  		    !perf_hpp__defined_dynamic_entry(fmt, hists))
> -- 
> 2.47.1.613.gc27f4b7a9f-goog
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled
  2025-01-09 21:59   ` Namhyung Kim
@ 2025-01-10 12:59     ` Dmitry Vyukov
  2025-01-11  0:29       ` Namhyung Kim
  0 siblings, 1 reply; 9+ messages in thread
From: Dmitry Vyukov @ 2025-01-10 12:59 UTC (permalink / raw)
  To: Namhyung Kim; +Cc: irogers, linux-perf-users, linux-kernel

On Thu, 9 Jan 2025 at 22:59, Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Wed, Jan 08, 2025 at 08:36:54AM +0100, Dmitry Vyukov wrote:
> > When a filtered column is not present in the sort order, profiles become
> > arbitrary broken. Filtered and non-filtered entries are collapsed
> > together, and the filtered-by field ends up with a random value (either
> > from a filtered or non-filtered entry). If we end up with filtered
> > entry/value, then the whole collapsed entry will be filtered out and will
> > be missing in the profile. If we end up with non-filtered entry/value,
> > then the overhead value will be wrongly larger (include some subset
> > of filtered out samples).
> >
> > This leads to very confusing profiles. The problem is hard to notice,
> > and if noticed hard to understand. If the filter is for a single value,
> > then it can be fixed by adding the corresponding field to the sort order
> > (provided user understood the problem). But if the filter is for multiple
> > values, it's impossible to fix b/c there is no concept of binary sorting
> > based on filter predicate (we want to group all non-filtered values in
> > one bucket, and all filtered values in another).
> >
> > Examples of affected commands:
> > perf report --tid=123
> > perf report --sort overhead,symbol --comm=foo,bar
> >
> > Fix this by considering filtered status as the highest priority
> > sort/collapse predicate.
>
> Do you mean when you specify filters not in a sort list?
>
> I guess it's an undefined behavior.. probably better to warn users not
> to do that and exit.  The hist entries are built using only the fields
> in the sort list.  Filtering by unspecified fields would be meaningless
> and won't work well IMHO.

We could warn as well, but I do want this feature for:
https://lore.kernel.org/linux-perf-users/CACT4Y+an1LSY15f9MS_vnbaaeeqMf+k4-Dqqfu-zwcUAHFNk=w@mail.gmail.com/T/#t
I think it may be useful for other cases as well, let's discuss the
feature below.
Why won't it work well? At least in my local manual testing, it worked
well. This does make hist built using filtered fields as well.


> > As a side effect this effectively adds a new feature of showing profile
> > where several lines are combined based on arbitrary filtering predicate.
> > For example, showing symbols from binaries foo and bar combined together,
> > but not from other binaries; or showing combined overhead of several
> > particular threads.
>
> I'm not sure I'm following.  I'm seeing this even with your change.
>
> Without filters:
>
>   $ ./perf report -s dso --stdio -q
>       98.41%  perf
>        1.11%  [kernel.kallsyms]
>        0.41%  ld-linux-x86-64.so.2
>        0.05%  libLLVM-16.so.1
>        0.03%  libc.so.6
>
>
> With filter:
>
>   $ ./perf report -s dso -d '[kernel.kallsyms],libc.so.6' --stdio -q
>        1.11%  [kernel.kallsyms]
>        0.03%  libc.so.6
>
> You said you want to have them combined together, right?
> But I think the current behavior makes more sense.


If you add a field to both sort and filter, then this change does not
affect anything (filtered fields already participate in
sorting/merging). It affects things when the filter field is not in
sort.

Consider the following queries:
"What symbols consume time in this subset of binaries (but not in the rest)?"
Naively, one would try:
perf report --sort=symbol --comm=cc1,ld
And this deceptively works and shows something, but as far as I
understand these are random numbers now.
With this change you can actually do this (you will see things like
malloc/free in both of these binaries, or if one profiles e.g. Go
programs, then there will be lots of common symbols from
runtime/stdlin).

Another example: profile for this particular subset of threads/cpus
(but not for the rest).

And in particular for:
https://lore.kernel.org/linux-perf-users/CACT4Y+an1LSY15f9MS_vnbaaeeqMf+k4-Dqqfu-zwcUAHFNk=w@mail.gmail.com/T/#t
I want to enable: --parallelism=1-8 filter.
Currently you will either see random garbage numbers, or if you add
'parallelism' to sort order, then you will see each symbol/comm
duplicated 8 times for each separate value of parallelism (which is
not useful). But currently you can't see symbol/comm for these 8
parallelism levels combined, but not for the rest.

All-in-all, this looks like a change that defines behavior that's
currently undefined in a useful and intuitive way, without changing
any of the currently defined behaviors.











> Thanks,
> Namhyung
>
> >
> > Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> > Cc: Namhyung Kim <namhyung@kernel.org>
> > Cc: Ian Rogers <irogers@google.com>
> > Cc: linux-perf-users@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org
> > ---
> >  tools/perf/util/hist.c | 11 ++++++++++-
> >  1 file changed, 10 insertions(+), 1 deletion(-)
> >
> > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> > index 8e4e844425370..b70170d854a0c 100644
> > --- a/tools/perf/util/hist.c
> > +++ b/tools/perf/util/hist.c
> > @@ -1303,9 +1303,18 @@ hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left,
> >       typedef int64_t (*fn_t)(struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *);
> >       struct hists *hists = left->hists;
> >       struct perf_hpp_fmt *fmt;
> > -     int64_t cmp = 0;
> > +     int64_t cmp;
> >       fn_t fn;
> >
> > +     /*
> > +      * Never collapse filtered and non-filtered entries.
> > +      * Note this is not the same as having an extra (invisible) fmt
> > +      * that corresponds to the filtered status.
> > +      */
> > +     cmp = (int64_t)!!left->filtered - (int64_t)!!right->filtered;
> > +     if (cmp)
> > +             return cmp;
> > +
> >       perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
> >               if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) &&
> >                   !perf_hpp__defined_dynamic_entry(fmt, hists))
> > --
> > 2.47.1.613.gc27f4b7a9f-goog
> >

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code
  2025-01-09 21:43   ` Namhyung Kim
@ 2025-01-10 13:01     ` Dmitry Vyukov
  2025-01-13 13:36       ` Dmitry Vyukov
  0 siblings, 1 reply; 9+ messages in thread
From: Dmitry Vyukov @ 2025-01-10 13:01 UTC (permalink / raw)
  To: Namhyung Kim; +Cc: irogers, linux-perf-users, linux-kernel

On Thu, 9 Jan 2025 at 22:43, Namhyung Kim <namhyung@kernel.org> wrote:
>
> Hello,
>
> On Wed, Jan 08, 2025 at 08:36:53AM +0100, Dmitry Vyukov wrote:
> > Application of cmp/sort/collapse fmt callbacks is duplicated 6 times.
> > Factor it into a common helper function. NFC.
>
> It's interesting to use offsetof and cast it to functions. :)

Is there a better way to do this?
Intuitively I reached for member pointers, but then realized it's not
a thing in C...


> > Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> > Cc: Namhyung Kim <namhyung@kernel.org>
> > Cc: Ian Rogers <irogers@google.com>
> > Cc: linux-perf-users@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org
> > ---
> >  tools/perf/util/hist.c | 104 +++++++++++++++++------------------------
> >  tools/perf/util/hist.h |   2 -
> >  2 files changed, 44 insertions(+), 62 deletions(-)
> >
> > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> > index fff1345658016..8e4e844425370 100644
> > --- a/tools/perf/util/hist.c
> > +++ b/tools/perf/util/hist.c
> > @@ -32,6 +32,9 @@
> >  #include <linux/time64.h>
> >  #include <linux/zalloc.h>
> >
> > +static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
> > +static int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
> > +
> >  static bool hists__filter_entry_by_dso(struct hists *hists,
> >                                      struct hist_entry *he);
> >  static bool hists__filter_entry_by_thread(struct hists *hists,
> > @@ -1292,19 +1295,27 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
> >       return err;
> >  }
> >
> > -int64_t
> > -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
> > +static int64_t
> > +hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left,
> > +                  struct hist_entry *right, unsigned long fn_offset,
> > +                  bool ignore_dynamic, bool ignore_skipped)
> >  {
> > +     typedef int64_t (*fn_t)(struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *);
>
> Probably better to put it in util/hist.h and each members (cmp, collapse,
> sort) use the typedef to make sure they remain in the same.

Noted. But let's decide on the "perf hist: Fix bogus profiles when
filters are enabled" first, that was the motivation for this
refactoring.


> >       struct hists *hists = left->hists;
> >       struct perf_hpp_fmt *fmt;
> >       int64_t cmp = 0;
> > +     fn_t fn;
> >
> > -     hists__for_each_sort_list(hists, fmt) {
> > -             if (perf_hpp__is_dynamic_entry(fmt) &&
> > +     perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
> > +             if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) &&
> >                   !perf_hpp__defined_dynamic_entry(fmt, hists))
> >                       continue;
> >
> > -             cmp = fmt->cmp(fmt, left, right);
> > +             if (ignore_skipped && perf_hpp__should_skip(fmt, hists))
> > +                     continue;
> > +
> > +             fn = *(fn_t *)(((char *)fmt) + fn_offset);
>
> Doesn't just below work?
>
>                 fn = (void *)fmt + fn_offset;
>
> Thanks,
> Namhyung
>
>
> > +             cmp = fn(fmt, left, right);
> >               if (cmp)
> >                       break;
> >       }
> > @@ -1313,23 +1324,33 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
> >  }
> >
> >  int64_t
> > -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
> > +hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
> >  {
> > -     struct hists *hists = left->hists;
> > -     struct perf_hpp_fmt *fmt;
> > -     int64_t cmp = 0;
> > +     return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
> > +             offsetof(struct perf_hpp_fmt, cmp), true, false);
> > +}
> >
> > -     hists__for_each_sort_list(hists, fmt) {
> > -             if (perf_hpp__is_dynamic_entry(fmt) &&
> > -                 !perf_hpp__defined_dynamic_entry(fmt, hists))
> > -                     continue;
> > +static int64_t
> > +hist_entry__sort(struct hist_entry *left, struct hist_entry *right)
> > +{
> > +     return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
> > +             offsetof(struct perf_hpp_fmt, sort), false, true);
> > +}
> >
> > -             cmp = fmt->collapse(fmt, left, right);
> > -             if (cmp)
> > -                     break;
> > -     }
> > +int64_t
> > +hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
> > +{
> > +     return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
> > +             offsetof(struct perf_hpp_fmt, collapse), true, false);
> > +}
> >
> > -     return cmp;
> > +static int64_t
> > +hist_entry__collapse_hierarchy(struct perf_hpp_list *hpp_list,
> > +                            struct hist_entry *left,
> > +                            struct hist_entry *right)
> > +{
> > +     return hist_entry__cmp_impl(hpp_list, left, right,
> > +             offsetof(struct perf_hpp_fmt, collapse), false, false);
> >  }
> >
> >  void hist_entry__delete(struct hist_entry *he)
> > @@ -1503,14 +1524,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
> >       while (*p != NULL) {
> >               parent = *p;
> >               iter = rb_entry(parent, struct hist_entry, rb_node_in);
> > -
> > -             cmp = 0;
> > -             perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
> > -                     cmp = fmt->collapse(fmt, iter, he);
> > -                     if (cmp)
> > -                             break;
> > -             }
> > -
> > +             cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he);
> >               if (!cmp) {
> >                       he_stat__add_stat(&iter->stat, &he->stat);
> >                       return iter;
> > @@ -1730,24 +1744,6 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
> >       return 0;
> >  }
> >
> > -static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
> > -{
> > -     struct hists *hists = a->hists;
> > -     struct perf_hpp_fmt *fmt;
> > -     int64_t cmp = 0;
> > -
> > -     hists__for_each_sort_list(hists, fmt) {
> > -             if (perf_hpp__should_skip(fmt, a->hists))
> > -                     continue;
> > -
> > -             cmp = fmt->sort(fmt, a, b);
> > -             if (cmp)
> > -                     break;
> > -     }
> > -
> > -     return cmp;
> > -}
> > -
> >  static void hists__reset_filter_stats(struct hists *hists)
> >  {
> >       hists->nr_non_filtered_entries = 0;
> > @@ -2449,21 +2445,15 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
> >       struct rb_node **p;
> >       struct rb_node *parent = NULL;
> >       struct hist_entry *he;
> > -     struct perf_hpp_fmt *fmt;
> >       bool leftmost = true;
> >
> >       p = &root->rb_root.rb_node;
> >       while (*p != NULL) {
> > -             int64_t cmp = 0;
> > +             int64_t cmp;
> >
> >               parent = *p;
> >               he = rb_entry(parent, struct hist_entry, rb_node_in);
> > -
> > -             perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
> > -                     cmp = fmt->collapse(fmt, he, pair);
> > -                     if (cmp)
> > -                             break;
> > -             }
> > +             cmp = hist_entry__collapse_hierarchy(he->hpp_list, he, pair);
> >               if (!cmp)
> >                       goto out;
> >
> > @@ -2521,16 +2511,10 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *roo
> >
> >       while (n) {
> >               struct hist_entry *iter;
> > -             struct perf_hpp_fmt *fmt;
> > -             int64_t cmp = 0;
> > +             int64_t cmp;
> >
> >               iter = rb_entry(n, struct hist_entry, rb_node_in);
> > -             perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
> > -                     cmp = fmt->collapse(fmt, iter, he);
> > -                     if (cmp)
> > -                             break;
> > -             }
> > -
> > +             cmp = hist_entry__collapse_hierarchy(he->hpp_list, iter, he);
> >               if (cmp < 0)
> >                       n = n->rb_left;
> >               else if (cmp > 0)
> > diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> > index 1131056924d9c..6cff03eb043b7 100644
> > --- a/tools/perf/util/hist.h
> > +++ b/tools/perf/util/hist.h
> > @@ -342,8 +342,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
> >  struct perf_hpp;
> >  struct perf_hpp_fmt;
> >
> > -int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
> > -int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
> >  int hist_entry__transaction_len(void);
> >  int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
> >                             struct hists *hists);
> > --
> > 2.47.1.613.gc27f4b7a9f-goog
> >

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled
  2025-01-10 12:59     ` Dmitry Vyukov
@ 2025-01-11  0:29       ` Namhyung Kim
  0 siblings, 0 replies; 9+ messages in thread
From: Namhyung Kim @ 2025-01-11  0:29 UTC (permalink / raw)
  To: Dmitry Vyukov; +Cc: irogers, linux-perf-users, linux-kernel

On Fri, Jan 10, 2025 at 01:59:54PM +0100, Dmitry Vyukov wrote:
> On Thu, 9 Jan 2025 at 22:59, Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > On Wed, Jan 08, 2025 at 08:36:54AM +0100, Dmitry Vyukov wrote:
> > > When a filtered column is not present in the sort order, profiles become
> > > arbitrary broken. Filtered and non-filtered entries are collapsed
> > > together, and the filtered-by field ends up with a random value (either
> > > from a filtered or non-filtered entry). If we end up with filtered
> > > entry/value, then the whole collapsed entry will be filtered out and will
> > > be missing in the profile. If we end up with non-filtered entry/value,
> > > then the overhead value will be wrongly larger (include some subset
> > > of filtered out samples).
> > >
> > > This leads to very confusing profiles. The problem is hard to notice,
> > > and if noticed hard to understand. If the filter is for a single value,
> > > then it can be fixed by adding the corresponding field to the sort order
> > > (provided user understood the problem). But if the filter is for multiple
> > > values, it's impossible to fix b/c there is no concept of binary sorting
> > > based on filter predicate (we want to group all non-filtered values in
> > > one bucket, and all filtered values in another).
> > >
> > > Examples of affected commands:
> > > perf report --tid=123
> > > perf report --sort overhead,symbol --comm=foo,bar
> > >
> > > Fix this by considering filtered status as the highest priority
> > > sort/collapse predicate.
> >
> > Do you mean when you specify filters not in a sort list?
> >
> > I guess it's an undefined behavior.. probably better to warn users not
> > to do that and exit.  The hist entries are built using only the fields
> > in the sort list.  Filtering by unspecified fields would be meaningless
> > and won't work well IMHO.
> 
> We could warn as well, but I do want this feature for:
> https://lore.kernel.org/linux-perf-users/CACT4Y+an1LSY15f9MS_vnbaaeeqMf+k4-Dqqfu-zwcUAHFNk=w@mail.gmail.com/T/#t
> I think it may be useful for other cases as well, let's discuss the
> feature below.
> Why won't it work well? At least in my local manual testing, it worked
> well. This does make hist built using filtered fields as well.

I thought hist entry was constructed only for the fields in the sort
list and then apply the filter on the hist entry.  But looking at the
code, I've realized that the filter is set before the hist entry is
created.  Then it would work..

> 
> 
> > > As a side effect this effectively adds a new feature of showing profile
> > > where several lines are combined based on arbitrary filtering predicate.
> > > For example, showing symbols from binaries foo and bar combined together,
> > > but not from other binaries; or showing combined overhead of several
> > > particular threads.
> >
> > I'm not sure I'm following.  I'm seeing this even with your change.
> >
> > Without filters:
> >
> >   $ ./perf report -s dso --stdio -q
> >       98.41%  perf
> >        1.11%  [kernel.kallsyms]
> >        0.41%  ld-linux-x86-64.so.2
> >        0.05%  libLLVM-16.so.1
> >        0.03%  libc.so.6
> >
> >
> > With filter:
> >
> >   $ ./perf report -s dso -d '[kernel.kallsyms],libc.so.6' --stdio -q
> >        1.11%  [kernel.kallsyms]
> >        0.03%  libc.so.6
> >
> > You said you want to have them combined together, right?
> > But I think the current behavior makes more sense.
> 
> 
> If you add a field to both sort and filter, then this change does not
> affect anything (filtered fields already participate in
> sorting/merging). It affects things when the filter field is not in
> sort.
> 
> Consider the following queries:
> "What symbols consume time in this subset of binaries (but not in the rest)?"
> Naively, one would try:
> perf report --sort=symbol --comm=cc1,ld
> And this deceptively works and shows something, but as far as I
> understand these are random numbers now.

Right, that's what I expected.  But I agree with you that this type of
filtering can be useful and intuitive.


> With this change you can actually do this (you will see things like
> malloc/free in both of these binaries, or if one profiles e.g. Go
> programs, then there will be lots of common symbols from
> runtime/stdlin).

Yep, by not merging the filtered entries you can get the precise profile
of symbols from the specified binaries only.

> 
> Another example: profile for this particular subset of threads/cpus
> (but not for the rest).
> 
> And in particular for:
> https://lore.kernel.org/linux-perf-users/CACT4Y+an1LSY15f9MS_vnbaaeeqMf+k4-Dqqfu-zwcUAHFNk=w@mail.gmail.com/T/#t
> I want to enable: --parallelism=1-8 filter.
> Currently you will either see random garbage numbers, or if you add
> 'parallelism' to sort order, then you will see each symbol/comm
> duplicated 8 times for each separate value of parallelism (which is
> not useful). But currently you can't see symbol/comm for these 8
> parallelism levels combined, but not for the rest.

Interesting, I'll take a look at it later.

> 
> All-in-all, this looks like a change that defines behavior that's
> currently undefined in a useful and intuitive way, without changing
> any of the currently defined behaviors.

Makes sense.

Thanks,
Namhyung


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code
  2025-01-10 13:01     ` Dmitry Vyukov
@ 2025-01-13 13:36       ` Dmitry Vyukov
  0 siblings, 0 replies; 9+ messages in thread
From: Dmitry Vyukov @ 2025-01-13 13:36 UTC (permalink / raw)
  To: Namhyung Kim; +Cc: irogers, linux-perf-users, linux-kernel

On Fri, 10 Jan 2025 at 14:01, Dmitry Vyukov <dvyukov@google.com> wrote:
>
> On Thu, 9 Jan 2025 at 22:43, Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > Hello,
> >
> > On Wed, Jan 08, 2025 at 08:36:53AM +0100, Dmitry Vyukov wrote:
> > > Application of cmp/sort/collapse fmt callbacks is duplicated 6 times.
> > > Factor it into a common helper function. NFC.
> >
> > It's interesting to use offsetof and cast it to functions. :)
>
> Is there a better way to do this?
> Intuitively I reached for member pointers, but then realized it's not
> a thing in C...
>
>
> > > Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> > > Cc: Namhyung Kim <namhyung@kernel.org>
> > > Cc: Ian Rogers <irogers@google.com>
> > > Cc: linux-perf-users@vger.kernel.org
> > > Cc: linux-kernel@vger.kernel.org
> > > ---
> > >  tools/perf/util/hist.c | 104 +++++++++++++++++------------------------
> > >  tools/perf/util/hist.h |   2 -
> > >  2 files changed, 44 insertions(+), 62 deletions(-)
> > >
> > > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> > > index fff1345658016..8e4e844425370 100644
> > > --- a/tools/perf/util/hist.c
> > > +++ b/tools/perf/util/hist.c
> > > @@ -32,6 +32,9 @@
> > >  #include <linux/time64.h>
> > >  #include <linux/zalloc.h>
> > >
> > > +static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
> > > +static int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
> > > +
> > >  static bool hists__filter_entry_by_dso(struct hists *hists,
> > >                                      struct hist_entry *he);
> > >  static bool hists__filter_entry_by_thread(struct hists *hists,
> > > @@ -1292,19 +1295,27 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
> > >       return err;
> > >  }
> > >
> > > -int64_t
> > > -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
> > > +static int64_t
> > > +hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left,
> > > +                  struct hist_entry *right, unsigned long fn_offset,
> > > +                  bool ignore_dynamic, bool ignore_skipped)
> > >  {
> > > +     typedef int64_t (*fn_t)(struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *);
> >
> > Probably better to put it in util/hist.h and each members (cmp, collapse,
> > sort) use the typedef to make sure they remain in the same.

Good idea!. Done in v2

> Noted. But let's decide on the "perf hist: Fix bogus profiles when
> filters are enabled" first, that was the motivation for this
> refactoring.
>
>
> > >       struct hists *hists = left->hists;
> > >       struct perf_hpp_fmt *fmt;
> > >       int64_t cmp = 0;
> > > +     fn_t fn;
> > >
> > > -     hists__for_each_sort_list(hists, fmt) {
> > > -             if (perf_hpp__is_dynamic_entry(fmt) &&
> > > +     perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
> > > +             if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) &&
> > >                   !perf_hpp__defined_dynamic_entry(fmt, hists))
> > >                       continue;
> > >
> > > -             cmp = fmt->cmp(fmt, left, right);
> > > +             if (ignore_skipped && perf_hpp__should_skip(fmt, hists))
> > > +                     continue;
> > > +
> > > +             fn = *(fn_t *)(((char *)fmt) + fn_offset);
> >
> > Doesn't just below work?
> >
> >                 fn = (void *)fmt + fn_offset;

No, this just calculates a pointer to the field that contains the
function pointer. But we also need to dereference that field and get
the function pointer.

> > Thanks,
> > Namhyung
> >
> >
> > > +             cmp = fn(fmt, left, right);
> > >               if (cmp)
> > >                       break;
> > >       }
> > > @@ -1313,23 +1324,33 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
> > >  }
> > >
> > >  int64_t
> > > -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
> > > +hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
> > >  {
> > > -     struct hists *hists = left->hists;
> > > -     struct perf_hpp_fmt *fmt;
> > > -     int64_t cmp = 0;
> > > +     return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
> > > +             offsetof(struct perf_hpp_fmt, cmp), true, false);
> > > +}
> > >
> > > -     hists__for_each_sort_list(hists, fmt) {
> > > -             if (perf_hpp__is_dynamic_entry(fmt) &&
> > > -                 !perf_hpp__defined_dynamic_entry(fmt, hists))
> > > -                     continue;
> > > +static int64_t
> > > +hist_entry__sort(struct hist_entry *left, struct hist_entry *right)
> > > +{
> > > +     return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
> > > +             offsetof(struct perf_hpp_fmt, sort), false, true);
> > > +}
> > >
> > > -             cmp = fmt->collapse(fmt, left, right);
> > > -             if (cmp)
> > > -                     break;
> > > -     }
> > > +int64_t
> > > +hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
> > > +{
> > > +     return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
> > > +             offsetof(struct perf_hpp_fmt, collapse), true, false);
> > > +}
> > >
> > > -     return cmp;
> > > +static int64_t
> > > +hist_entry__collapse_hierarchy(struct perf_hpp_list *hpp_list,
> > > +                            struct hist_entry *left,
> > > +                            struct hist_entry *right)
> > > +{
> > > +     return hist_entry__cmp_impl(hpp_list, left, right,
> > > +             offsetof(struct perf_hpp_fmt, collapse), false, false);
> > >  }
> > >
> > >  void hist_entry__delete(struct hist_entry *he)
> > > @@ -1503,14 +1524,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
> > >       while (*p != NULL) {
> > >               parent = *p;
> > >               iter = rb_entry(parent, struct hist_entry, rb_node_in);
> > > -
> > > -             cmp = 0;
> > > -             perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
> > > -                     cmp = fmt->collapse(fmt, iter, he);
> > > -                     if (cmp)
> > > -                             break;
> > > -             }
> > > -
> > > +             cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he);
> > >               if (!cmp) {
> > >                       he_stat__add_stat(&iter->stat, &he->stat);
> > >                       return iter;
> > > @@ -1730,24 +1744,6 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
> > >       return 0;
> > >  }
> > >
> > > -static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
> > > -{
> > > -     struct hists *hists = a->hists;
> > > -     struct perf_hpp_fmt *fmt;
> > > -     int64_t cmp = 0;
> > > -
> > > -     hists__for_each_sort_list(hists, fmt) {
> > > -             if (perf_hpp__should_skip(fmt, a->hists))
> > > -                     continue;
> > > -
> > > -             cmp = fmt->sort(fmt, a, b);
> > > -             if (cmp)
> > > -                     break;
> > > -     }
> > > -
> > > -     return cmp;
> > > -}
> > > -
> > >  static void hists__reset_filter_stats(struct hists *hists)
> > >  {
> > >       hists->nr_non_filtered_entries = 0;
> > > @@ -2449,21 +2445,15 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
> > >       struct rb_node **p;
> > >       struct rb_node *parent = NULL;
> > >       struct hist_entry *he;
> > > -     struct perf_hpp_fmt *fmt;
> > >       bool leftmost = true;
> > >
> > >       p = &root->rb_root.rb_node;
> > >       while (*p != NULL) {
> > > -             int64_t cmp = 0;
> > > +             int64_t cmp;
> > >
> > >               parent = *p;
> > >               he = rb_entry(parent, struct hist_entry, rb_node_in);
> > > -
> > > -             perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
> > > -                     cmp = fmt->collapse(fmt, he, pair);
> > > -                     if (cmp)
> > > -                             break;
> > > -             }
> > > +             cmp = hist_entry__collapse_hierarchy(he->hpp_list, he, pair);
> > >               if (!cmp)
> > >                       goto out;
> > >
> > > @@ -2521,16 +2511,10 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *roo
> > >
> > >       while (n) {
> > >               struct hist_entry *iter;
> > > -             struct perf_hpp_fmt *fmt;
> > > -             int64_t cmp = 0;
> > > +             int64_t cmp;
> > >
> > >               iter = rb_entry(n, struct hist_entry, rb_node_in);
> > > -             perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
> > > -                     cmp = fmt->collapse(fmt, iter, he);
> > > -                     if (cmp)
> > > -                             break;
> > > -             }
> > > -
> > > +             cmp = hist_entry__collapse_hierarchy(he->hpp_list, iter, he);
> > >               if (cmp < 0)
> > >                       n = n->rb_left;
> > >               else if (cmp > 0)
> > > diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> > > index 1131056924d9c..6cff03eb043b7 100644
> > > --- a/tools/perf/util/hist.h
> > > +++ b/tools/perf/util/hist.h
> > > @@ -342,8 +342,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
> > >  struct perf_hpp;
> > >  struct perf_hpp_fmt;
> > >
> > > -int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
> > > -int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
> > >  int hist_entry__transaction_len(void);
> > >  int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
> > >                             struct hists *hists);
> > > --
> > > 2.47.1.613.gc27f4b7a9f-goog
> > >

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2025-01-13 13:36 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-01-08  8:23 [PATCH 0/2] perf hist: Fix bogus profiles when filters are enabled Dmitry Vyukov
2025-01-08  7:36 ` [PATCH 1/2] perf hist: Deduplicate cmp/sort/collapse code Dmitry Vyukov
2025-01-09 21:43   ` Namhyung Kim
2025-01-10 13:01     ` Dmitry Vyukov
2025-01-13 13:36       ` Dmitry Vyukov
2025-01-08  7:36 ` [PATCH 2/2] perf hist: Fix bogus profiles when filters are enabled Dmitry Vyukov
2025-01-09 21:59   ` Namhyung Kim
2025-01-10 12:59     ` Dmitry Vyukov
2025-01-11  0:29       ` Namhyung Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).