linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] perf record: Split --data-mmap option
@ 2025-12-10  2:33 Namhyung Kim
  2025-12-10  2:33 ` [PATCH 2/4] perf report: Enable data-type profiling with -F option too Namhyung Kim
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Namhyung Kim @ 2025-12-10  2:33 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, James Clark
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users

Currently -d/--data option controls both PERF_SAMPLE_ADDR bit and
perf_event_attr.mmap_data flag.  Separate them using new --data-mmap
option to support recording only one of them.

For data-type profiling, data MMAP is unnecessary but it wastes a lot
of space in the ring buffer and data file.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/Documentation/perf-record.txt |  8 +++++++-
 tools/perf/builtin-record.c              | 19 +++++++++++++------
 tools/perf/util/evsel.c                  |  5 +++--
 tools/perf/util/record.h                 |  2 ++
 4 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index e8b9aadbbfa50574..c402e74172f6a22d 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -344,7 +344,8 @@ OPTIONS
 
 -d::
 --data::
-	Record the sample virtual addresses.  Implies --sample-mem-info.
+	Record the sample virtual addresses.  Implies --sample-mem-info and
+	--data-mmap.
 
 --phys-data::
 	Record the sample physical addresses.
@@ -861,6 +862,11 @@ filtered through the mask provided by -C option.
 	Prepare BPF filter to be used by regular users.  The action should be
 	either "pin" or "unpin".  The filter can be used after it's pinned.
 
+--data-mmap::
+	Enable recording MMAP events for non-executable mappings.  Basically
+	perf only records executable mappings but data mmaping can be useful
+	when you analyze data access with sample addresses.  So using -d option
+	would enable this unless you specify --no-data-mmap manually.
 
 include::intel-hybrid.txt[]
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 2584d0d8bc820676..cbfbd9bb10634093 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1881,7 +1881,7 @@ static int record__synthesize_workload(struct record *rec, bool tail)
 						 process_synthesized_event,
 						 &rec->session->machines.host,
 						 needs_mmap,
-						 rec->opts.sample_address);
+						 rec->opts.record_data_mmap);
 	perf_thread_map__put(thread_map);
 	return err;
 }
@@ -2191,7 +2191,7 @@ static int record__synthesize(struct record *rec, bool tail)
 
 		err = __machine__synthesize_threads(machine, tool, &opts->target,
 						    rec->evlist->core.threads,
-						    f, needs_mmap, opts->sample_address,
+						    f, needs_mmap, opts->record_data_mmap,
 						    rec->opts.nr_threads_synthesize);
 	}
 
@@ -3006,8 +3006,9 @@ int record_opts__parse_callchain(struct record_opts *record,
 	ret = parse_callchain_record_opt(arg, callchain);
 	if (!ret) {
 		/* Enable data address sampling for DWARF unwind. */
-		if (callchain->record_mode == CALLCHAIN_DWARF)
-			record->sample_address = true;
+		if (callchain->record_mode == CALLCHAIN_DWARF &&
+		    !record->record_data_mmap_set)
+			record->record_data_mmap = true;
 		callchain_debug(callchain);
 	}
 
@@ -3686,6 +3687,9 @@ static struct option __record_options[] = {
 	OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms",
 		     "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)",
 		     record__parse_off_cpu_thresh),
+	OPT_BOOLEAN_SET(0, "data-mmap", &record.opts.record_data_mmap,
+			&record.opts.record_data_mmap_set,
+			"Record mmap events for non-executable mappings"),
 	OPT_END()
 };
 
@@ -4249,9 +4253,12 @@ int cmd_record(int argc, const char **argv)
 		goto out_opts;
 	}
 
-	/* For backward compatibility, -d implies --mem-info */
-	if (rec->opts.sample_address)
+	/* For backward compatibility, -d implies --mem-info and --data-mmap */
+	if (rec->opts.sample_address) {
 		rec->opts.sample_data_src = true;
+		if (!rec->opts.record_data_mmap_set)
+			rec->opts.record_data_mmap = true;
+	}
 
 	/*
 	 * Allow aliases to facilitate the lookup of symbols for address
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 9cd706f6279313c2..ec6552a6f667fec6 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1445,10 +1445,11 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
 		attr->inherit_stat = 1;
 	}
 
-	if (opts->sample_address) {
+	if (opts->sample_address)
 		evsel__set_sample_bit(evsel, ADDR);
+
+	if (opts->record_data_mmap)
 		attr->mmap_data = track;
-	}
 
 	/*
 	 * We don't allow user space callchains for  function trace
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index ea3a6c4657eefb74..93627c9a73387ddd 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -40,6 +40,8 @@ struct record_opts {
 	bool	      record_cgroup;
 	bool	      record_switch_events;
 	bool	      record_switch_events_set;
+	bool	      record_data_mmap;
+	bool	      record_data_mmap_set;
 	bool	      all_kernel;
 	bool	      all_user;
 	bool	      kernel_callchains;
-- 
2.52.0.223.gf5cc29aaa4-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/4] perf report: Enable data-type profiling with -F option too
  2025-12-10  2:33 [PATCH 1/4] perf record: Split --data-mmap option Namhyung Kim
@ 2025-12-10  2:33 ` Namhyung Kim
  2025-12-10 22:05   ` Ian Rogers
  2025-12-10  2:33 ` [PATCH 3/4] perf report: Fix histogram entry collapsing for -F option Namhyung Kim
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Namhyung Kim @ 2025-12-10  2:33 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, James Clark
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users

It checked -s/--sort options only.  As the sort keys can be setup using
the -F/--fields option as well, it should enable data-type profiling
with it too.

The following two commands should have the same output.

  $ perf report -s type

  $ perf report -F overhead,type

But there's another problem on this.  I'll handle it in the next commit.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/builtin-report.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index add6b1c2aaf04270..6c2b4f93ec78e579 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1727,7 +1727,8 @@ int cmd_report(int argc, const char **argv)
 			sort_order = NULL;
 	}
 
-	if (sort_order && strstr(sort_order, "type")) {
+	if ((sort_order && strstr(sort_order, "type")) ||
+	    (field_order && strstr(field_order, "type"))) {
 		report.data_type = true;
 		annotate_opts.annotate_src = false;
 
-- 
2.52.0.223.gf5cc29aaa4-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/4] perf report: Fix histogram entry collapsing for -F option
  2025-12-10  2:33 [PATCH 1/4] perf record: Split --data-mmap option Namhyung Kim
  2025-12-10  2:33 ` [PATCH 2/4] perf report: Enable data-type profiling with -F option too Namhyung Kim
@ 2025-12-10  2:33 ` Namhyung Kim
  2025-12-10 22:06   ` Ian Rogers
  2025-12-10  2:33 ` [PATCH 4/4] perf report: Update sort key state from " Namhyung Kim
  2025-12-10 22:05 ` [PATCH 1/4] perf record: Split --data-mmap option Ian Rogers
  3 siblings, 1 reply; 8+ messages in thread
From: Namhyung Kim @ 2025-12-10  2:33 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, James Clark
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users

Users can use -F/--fields option to set output fields and sort keys
together.  But it missed to set perf_hpp_list->need_collapse for sort
entries that have se_collapse callbacks.  So it ends up with having
duplicated entries separately.

For example, let's run this command first.

  $ perf mem record -t load -U -- perf test -w datasym

This will record samples for memory access (load) to struct 'buf' and a
loop condition ('sig_atomic_t') types.  So the following two commands
should have identical output.

  $ perf report -s type --stdio --percent-limit=1 -q
      87.80%  perf                  buf
      12.17%  perf                  sig_atomic_t

But using -F option didn't collapse the entries based on types so the
result looked like below:

  $ perf report -F overhead,type --stdio --percent-limit=1 -q
      23.31%  perf                  buf
      22.84%  perf                  buf
      21.26%  perf                  buf
      20.39%  perf                  buf
      12.17%  perf                  sig_atomic_t

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/sort.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index a0a5c1c40af0c318..c51604eaae0a4b90 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -3578,6 +3578,9 @@ static int __sort_dimension__add_output(struct perf_hpp_list *list,
 	if (__sort_dimension__add_hpp_output(sd, list, level) < 0)
 		return -1;
 
+	if (sd->entry->se_collapse)
+		list->need_collapse = 1;
+
 	sd->taken = 1;
 	return 0;
 }
-- 
2.52.0.223.gf5cc29aaa4-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/4] perf report: Update sort key state from -F option
  2025-12-10  2:33 [PATCH 1/4] perf record: Split --data-mmap option Namhyung Kim
  2025-12-10  2:33 ` [PATCH 2/4] perf report: Enable data-type profiling with -F option too Namhyung Kim
  2025-12-10  2:33 ` [PATCH 3/4] perf report: Fix histogram entry collapsing for -F option Namhyung Kim
@ 2025-12-10  2:33 ` Namhyung Kim
  2025-12-10 22:06   ` Ian Rogers
  2025-12-10 22:05 ` [PATCH 1/4] perf record: Split --data-mmap option Ian Rogers
  3 siblings, 1 reply; 8+ messages in thread
From: Namhyung Kim @ 2025-12-10  2:33 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, James Clark
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users

Factor out __sort_dimension__update() so that it can be called from -s
and -F option parsing logics.  Otherwise the following command cannot go
into the annotation mode.

  $ perf report -F overhead,type,sym

  Warning: Annotation is only available for symbolic views, include "sym*" in --sort to use it.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/sort.c | 100 ++++++++++++++++++++++-------------------
 1 file changed, 54 insertions(+), 46 deletions(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index c51604eaae0a4b90..5e27a66e3ccb0bbe 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -3531,6 +3531,56 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok,
 	return ret;
 }
 
+static int __sort_dimension__update(struct sort_dimension *sd,
+				    struct perf_hpp_list *list)
+{
+	if (sd->entry == &sort_parent && parent_pattern) {
+		int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
+		if (ret) {
+			char err[BUFSIZ];
+
+			regerror(ret, &parent_regex, err, sizeof(err));
+			pr_err("Invalid regex: %s\n%s", parent_pattern, err);
+			return -EINVAL;
+		}
+		list->parent = 1;
+	} else if (sd->entry == &sort_sym) {
+		list->sym = 1;
+		/*
+		 * perf diff displays the performance difference amongst
+		 * two or more perf.data files. Those files could come
+		 * from different binaries. So we should not compare
+		 * their ips, but the name of symbol.
+		 */
+		if (sort__mode == SORT_MODE__DIFF)
+			sd->entry->se_collapse = sort__sym_sort;
+
+	} else if (sd->entry == &sort_sym_offset) {
+		list->sym = 1;
+	} else if (sd->entry == &sort_dso) {
+		list->dso = 1;
+	} else if (sd->entry == &sort_socket) {
+		list->socket = 1;
+	} else if (sd->entry == &sort_thread) {
+		list->thread = 1;
+	} else if (sd->entry == &sort_comm) {
+		list->comm = 1;
+	} else if (sd->entry == &sort_type_offset) {
+		symbol_conf.annotate_data_member = true;
+	} else if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) {
+		list->sym = 1;
+	} else if (sd->entry == &sort_mem_dcacheline && cacheline_size() == 0) {
+		return -EINVAL;
+	} else if (sd->entry == &sort_mem_daddr_sym) {
+		list->sym = 1;
+	}
+
+	if (sd->entry->se_collapse)
+		list->need_collapse = 1;
+
+	return 0;
+}
+
 static int __sort_dimension__add(struct sort_dimension *sd,
 				 struct perf_hpp_list *list,
 				 int level)
@@ -3541,8 +3591,8 @@ static int __sort_dimension__add(struct sort_dimension *sd,
 	if (__sort_dimension__add_hpp_sort(sd, list, level) < 0)
 		return -1;
 
-	if (sd->entry->se_collapse)
-		list->need_collapse = 1;
+	if (__sort_dimension__update(sd, list) < 0)
+		return -1;
 
 	sd->taken = 1;
 
@@ -3578,8 +3628,8 @@ static int __sort_dimension__add_output(struct perf_hpp_list *list,
 	if (__sort_dimension__add_hpp_output(sd, list, level) < 0)
 		return -1;
 
-	if (sd->entry->se_collapse)
-		list->need_collapse = 1;
+	if (__sort_dimension__update(sd, list) < 0)
+		return -1;
 
 	sd->taken = 1;
 	return 0;
@@ -3644,39 +3694,6 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
 				sort_dimension_add_dynamic_header(sd, env);
 		}
 
-		if (sd->entry == &sort_parent && parent_pattern) {
-			int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
-			if (ret) {
-				char err[BUFSIZ];
-
-				regerror(ret, &parent_regex, err, sizeof(err));
-				pr_err("Invalid regex: %s\n%s", parent_pattern, err);
-				return -EINVAL;
-			}
-			list->parent = 1;
-		} else if (sd->entry == &sort_sym) {
-			list->sym = 1;
-			/*
-			 * perf diff displays the performance difference amongst
-			 * two or more perf.data files. Those files could come
-			 * from different binaries. So we should not compare
-			 * their ips, but the name of symbol.
-			 */
-			if (sort__mode == SORT_MODE__DIFF)
-				sd->entry->se_collapse = sort__sym_sort;
-
-		} else if (sd->entry == &sort_dso) {
-			list->dso = 1;
-		} else if (sd->entry == &sort_socket) {
-			list->socket = 1;
-		} else if (sd->entry == &sort_thread) {
-			list->thread = 1;
-		} else if (sd->entry == &sort_comm) {
-			list->comm = 1;
-		} else if (sd->entry == &sort_type_offset) {
-			symbol_conf.annotate_data_member = true;
-		}
-
 		return __sort_dimension__add(sd, list, level);
 	}
 
@@ -3695,9 +3712,6 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
 				    strlen(tok)))
 			return -EINVAL;
 
-		if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
-			list->sym = 1;
-
 		__sort_dimension__add(sd, list, level);
 		return 0;
 	}
@@ -3711,12 +3725,6 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
 		if (sort__mode != SORT_MODE__MEMORY)
 			return -EINVAL;
 
-		if (sd->entry == &sort_mem_dcacheline && cacheline_size() == 0)
-			return -EINVAL;
-
-		if (sd->entry == &sort_mem_daddr_sym)
-			list->sym = 1;
-
 		__sort_dimension__add(sd, list, level);
 		return 0;
 	}
-- 
2.52.0.223.gf5cc29aaa4-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/4] perf record: Split --data-mmap option
  2025-12-10  2:33 [PATCH 1/4] perf record: Split --data-mmap option Namhyung Kim
                   ` (2 preceding siblings ...)
  2025-12-10  2:33 ` [PATCH 4/4] perf report: Update sort key state from " Namhyung Kim
@ 2025-12-10 22:05 ` Ian Rogers
  3 siblings, 0 replies; 8+ messages in thread
From: Ian Rogers @ 2025-12-10 22:05 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, James Clark, Jiri Olsa, Adrian Hunter,
	Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users

On Tue, Dec 9, 2025 at 6:33 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Currently -d/--data option controls both PERF_SAMPLE_ADDR bit and
> perf_event_attr.mmap_data flag.  Separate them using new --data-mmap
> option to support recording only one of them.
>
> For data-type profiling, data MMAP is unnecessary but it wastes a lot
> of space in the ring buffer and data file.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>

Reviewed-by: Ian Rogers <irogers@google.com>

Thanks,
Ian

> ---
>  tools/perf/Documentation/perf-record.txt |  8 +++++++-
>  tools/perf/builtin-record.c              | 19 +++++++++++++------
>  tools/perf/util/evsel.c                  |  5 +++--
>  tools/perf/util/record.h                 |  2 ++
>  4 files changed, 25 insertions(+), 9 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
> index e8b9aadbbfa50574..c402e74172f6a22d 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -344,7 +344,8 @@ OPTIONS
>
>  -d::
>  --data::
> -       Record the sample virtual addresses.  Implies --sample-mem-info.
> +       Record the sample virtual addresses.  Implies --sample-mem-info and
> +       --data-mmap.
>
>  --phys-data::
>         Record the sample physical addresses.
> @@ -861,6 +862,11 @@ filtered through the mask provided by -C option.
>         Prepare BPF filter to be used by regular users.  The action should be
>         either "pin" or "unpin".  The filter can be used after it's pinned.
>
> +--data-mmap::
> +       Enable recording MMAP events for non-executable mappings.  Basically
> +       perf only records executable mappings but data mmaping can be useful
> +       when you analyze data access with sample addresses.  So using -d option
> +       would enable this unless you specify --no-data-mmap manually.
>
>  include::intel-hybrid.txt[]
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 2584d0d8bc820676..cbfbd9bb10634093 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -1881,7 +1881,7 @@ static int record__synthesize_workload(struct record *rec, bool tail)
>                                                  process_synthesized_event,
>                                                  &rec->session->machines.host,
>                                                  needs_mmap,
> -                                                rec->opts.sample_address);
> +                                                rec->opts.record_data_mmap);
>         perf_thread_map__put(thread_map);
>         return err;
>  }
> @@ -2191,7 +2191,7 @@ static int record__synthesize(struct record *rec, bool tail)
>
>                 err = __machine__synthesize_threads(machine, tool, &opts->target,
>                                                     rec->evlist->core.threads,
> -                                                   f, needs_mmap, opts->sample_address,
> +                                                   f, needs_mmap, opts->record_data_mmap,
>                                                     rec->opts.nr_threads_synthesize);
>         }
>
> @@ -3006,8 +3006,9 @@ int record_opts__parse_callchain(struct record_opts *record,
>         ret = parse_callchain_record_opt(arg, callchain);
>         if (!ret) {
>                 /* Enable data address sampling for DWARF unwind. */
> -               if (callchain->record_mode == CALLCHAIN_DWARF)
> -                       record->sample_address = true;
> +               if (callchain->record_mode == CALLCHAIN_DWARF &&
> +                   !record->record_data_mmap_set)
> +                       record->record_data_mmap = true;
>                 callchain_debug(callchain);
>         }
>
> @@ -3686,6 +3687,9 @@ static struct option __record_options[] = {
>         OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms",
>                      "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)",
>                      record__parse_off_cpu_thresh),
> +       OPT_BOOLEAN_SET(0, "data-mmap", &record.opts.record_data_mmap,
> +                       &record.opts.record_data_mmap_set,
> +                       "Record mmap events for non-executable mappings"),
>         OPT_END()
>  };
>
> @@ -4249,9 +4253,12 @@ int cmd_record(int argc, const char **argv)
>                 goto out_opts;
>         }
>
> -       /* For backward compatibility, -d implies --mem-info */
> -       if (rec->opts.sample_address)
> +       /* For backward compatibility, -d implies --mem-info and --data-mmap */
> +       if (rec->opts.sample_address) {
>                 rec->opts.sample_data_src = true;
> +               if (!rec->opts.record_data_mmap_set)
> +                       rec->opts.record_data_mmap = true;
> +       }
>
>         /*
>          * Allow aliases to facilitate the lookup of symbols for address
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 9cd706f6279313c2..ec6552a6f667fec6 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1445,10 +1445,11 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
>                 attr->inherit_stat = 1;
>         }
>
> -       if (opts->sample_address) {
> +       if (opts->sample_address)
>                 evsel__set_sample_bit(evsel, ADDR);
> +
> +       if (opts->record_data_mmap)
>                 attr->mmap_data = track;
> -       }
>
>         /*
>          * We don't allow user space callchains for  function trace
> diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
> index ea3a6c4657eefb74..93627c9a73387ddd 100644
> --- a/tools/perf/util/record.h
> +++ b/tools/perf/util/record.h
> @@ -40,6 +40,8 @@ struct record_opts {
>         bool          record_cgroup;
>         bool          record_switch_events;
>         bool          record_switch_events_set;
> +       bool          record_data_mmap;
> +       bool          record_data_mmap_set;
>         bool          all_kernel;
>         bool          all_user;
>         bool          kernel_callchains;
> --
> 2.52.0.223.gf5cc29aaa4-goog
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/4] perf report: Enable data-type profiling with -F option too
  2025-12-10  2:33 ` [PATCH 2/4] perf report: Enable data-type profiling with -F option too Namhyung Kim
@ 2025-12-10 22:05   ` Ian Rogers
  0 siblings, 0 replies; 8+ messages in thread
From: Ian Rogers @ 2025-12-10 22:05 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, James Clark, Jiri Olsa, Adrian Hunter,
	Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users

On Tue, Dec 9, 2025 at 6:33 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> It checked -s/--sort options only.  As the sort keys can be setup using
> the -F/--fields option as well, it should enable data-type profiling
> with it too.
>
> The following two commands should have the same output.
>
>   $ perf report -s type
>
>   $ perf report -F overhead,type
>
> But there's another problem on this.  I'll handle it in the next commit.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>

Reviewed-by: Ian Rogers <irogers@google.com>

Thanks,
Ian

> ---
>  tools/perf/builtin-report.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index add6b1c2aaf04270..6c2b4f93ec78e579 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -1727,7 +1727,8 @@ int cmd_report(int argc, const char **argv)
>                         sort_order = NULL;
>         }
>
> -       if (sort_order && strstr(sort_order, "type")) {
> +       if ((sort_order && strstr(sort_order, "type")) ||
> +           (field_order && strstr(field_order, "type"))) {
>                 report.data_type = true;
>                 annotate_opts.annotate_src = false;
>
> --
> 2.52.0.223.gf5cc29aaa4-goog
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 3/4] perf report: Fix histogram entry collapsing for -F option
  2025-12-10  2:33 ` [PATCH 3/4] perf report: Fix histogram entry collapsing for -F option Namhyung Kim
@ 2025-12-10 22:06   ` Ian Rogers
  0 siblings, 0 replies; 8+ messages in thread
From: Ian Rogers @ 2025-12-10 22:06 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, James Clark, Jiri Olsa, Adrian Hunter,
	Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users

On Tue, Dec 9, 2025 at 6:33 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Users can use -F/--fields option to set output fields and sort keys
> together.  But it missed to set perf_hpp_list->need_collapse for sort
> entries that have se_collapse callbacks.  So it ends up with having
> duplicated entries separately.
>
> For example, let's run this command first.
>
>   $ perf mem record -t load -U -- perf test -w datasym
>
> This will record samples for memory access (load) to struct 'buf' and a
> loop condition ('sig_atomic_t') types.  So the following two commands
> should have identical output.
>
>   $ perf report -s type --stdio --percent-limit=1 -q
>       87.80%  perf                  buf
>       12.17%  perf                  sig_atomic_t
>
> But using -F option didn't collapse the entries based on types so the
> result looked like below:
>
>   $ perf report -F overhead,type --stdio --percent-limit=1 -q
>       23.31%  perf                  buf
>       22.84%  perf                  buf
>       21.26%  perf                  buf
>       20.39%  perf                  buf
>       12.17%  perf                  sig_atomic_t
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>

Reviewed-by: Ian Rogers <irogers@google.com>

Thanks,
Ian

> ---
>  tools/perf/util/sort.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index a0a5c1c40af0c318..c51604eaae0a4b90 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -3578,6 +3578,9 @@ static int __sort_dimension__add_output(struct perf_hpp_list *list,
>         if (__sort_dimension__add_hpp_output(sd, list, level) < 0)
>                 return -1;
>
> +       if (sd->entry->se_collapse)
> +               list->need_collapse = 1;
> +
>         sd->taken = 1;
>         return 0;
>  }
> --
> 2.52.0.223.gf5cc29aaa4-goog
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/4] perf report: Update sort key state from -F option
  2025-12-10  2:33 ` [PATCH 4/4] perf report: Update sort key state from " Namhyung Kim
@ 2025-12-10 22:06   ` Ian Rogers
  0 siblings, 0 replies; 8+ messages in thread
From: Ian Rogers @ 2025-12-10 22:06 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, James Clark, Jiri Olsa, Adrian Hunter,
	Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users

On Tue, Dec 9, 2025 at 6:33 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Factor out __sort_dimension__update() so that it can be called from -s
> and -F option parsing logics.  Otherwise the following command cannot go
> into the annotation mode.
>
>   $ perf report -F overhead,type,sym
>
>   Warning: Annotation is only available for symbolic views, include "sym*" in --sort to use it.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>

Reviewed-by: Ian Rogers <irogers@google.com>

Thanks,
Ian

> ---
>  tools/perf/util/sort.c | 100 ++++++++++++++++++++++-------------------
>  1 file changed, 54 insertions(+), 46 deletions(-)
>
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index c51604eaae0a4b90..5e27a66e3ccb0bbe 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -3531,6 +3531,56 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok,
>         return ret;
>  }
>
> +static int __sort_dimension__update(struct sort_dimension *sd,
> +                                   struct perf_hpp_list *list)
> +{
> +       if (sd->entry == &sort_parent && parent_pattern) {
> +               int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
> +               if (ret) {
> +                       char err[BUFSIZ];
> +
> +                       regerror(ret, &parent_regex, err, sizeof(err));
> +                       pr_err("Invalid regex: %s\n%s", parent_pattern, err);
> +                       return -EINVAL;
> +               }
> +               list->parent = 1;
> +       } else if (sd->entry == &sort_sym) {
> +               list->sym = 1;
> +               /*
> +                * perf diff displays the performance difference amongst
> +                * two or more perf.data files. Those files could come
> +                * from different binaries. So we should not compare
> +                * their ips, but the name of symbol.
> +                */
> +               if (sort__mode == SORT_MODE__DIFF)
> +                       sd->entry->se_collapse = sort__sym_sort;
> +
> +       } else if (sd->entry == &sort_sym_offset) {
> +               list->sym = 1;
> +       } else if (sd->entry == &sort_dso) {
> +               list->dso = 1;
> +       } else if (sd->entry == &sort_socket) {
> +               list->socket = 1;
> +       } else if (sd->entry == &sort_thread) {
> +               list->thread = 1;
> +       } else if (sd->entry == &sort_comm) {
> +               list->comm = 1;
> +       } else if (sd->entry == &sort_type_offset) {
> +               symbol_conf.annotate_data_member = true;
> +       } else if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) {
> +               list->sym = 1;
> +       } else if (sd->entry == &sort_mem_dcacheline && cacheline_size() == 0) {
> +               return -EINVAL;
> +       } else if (sd->entry == &sort_mem_daddr_sym) {
> +               list->sym = 1;
> +       }
> +
> +       if (sd->entry->se_collapse)
> +               list->need_collapse = 1;
> +
> +       return 0;
> +}
> +
>  static int __sort_dimension__add(struct sort_dimension *sd,
>                                  struct perf_hpp_list *list,
>                                  int level)
> @@ -3541,8 +3591,8 @@ static int __sort_dimension__add(struct sort_dimension *sd,
>         if (__sort_dimension__add_hpp_sort(sd, list, level) < 0)
>                 return -1;
>
> -       if (sd->entry->se_collapse)
> -               list->need_collapse = 1;
> +       if (__sort_dimension__update(sd, list) < 0)
> +               return -1;
>
>         sd->taken = 1;
>
> @@ -3578,8 +3628,8 @@ static int __sort_dimension__add_output(struct perf_hpp_list *list,
>         if (__sort_dimension__add_hpp_output(sd, list, level) < 0)
>                 return -1;
>
> -       if (sd->entry->se_collapse)
> -               list->need_collapse = 1;
> +       if (__sort_dimension__update(sd, list) < 0)
> +               return -1;
>
>         sd->taken = 1;
>         return 0;
> @@ -3644,39 +3694,6 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
>                                 sort_dimension_add_dynamic_header(sd, env);
>                 }
>
> -               if (sd->entry == &sort_parent && parent_pattern) {
> -                       int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
> -                       if (ret) {
> -                               char err[BUFSIZ];
> -
> -                               regerror(ret, &parent_regex, err, sizeof(err));
> -                               pr_err("Invalid regex: %s\n%s", parent_pattern, err);
> -                               return -EINVAL;
> -                       }
> -                       list->parent = 1;
> -               } else if (sd->entry == &sort_sym) {
> -                       list->sym = 1;
> -                       /*
> -                        * perf diff displays the performance difference amongst
> -                        * two or more perf.data files. Those files could come
> -                        * from different binaries. So we should not compare
> -                        * their ips, but the name of symbol.
> -                        */
> -                       if (sort__mode == SORT_MODE__DIFF)
> -                               sd->entry->se_collapse = sort__sym_sort;
> -
> -               } else if (sd->entry == &sort_dso) {
> -                       list->dso = 1;
> -               } else if (sd->entry == &sort_socket) {
> -                       list->socket = 1;
> -               } else if (sd->entry == &sort_thread) {
> -                       list->thread = 1;
> -               } else if (sd->entry == &sort_comm) {
> -                       list->comm = 1;
> -               } else if (sd->entry == &sort_type_offset) {
> -                       symbol_conf.annotate_data_member = true;
> -               }
> -
>                 return __sort_dimension__add(sd, list, level);
>         }
>
> @@ -3695,9 +3712,6 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
>                                     strlen(tok)))
>                         return -EINVAL;
>
> -               if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
> -                       list->sym = 1;
> -
>                 __sort_dimension__add(sd, list, level);
>                 return 0;
>         }
> @@ -3711,12 +3725,6 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
>                 if (sort__mode != SORT_MODE__MEMORY)
>                         return -EINVAL;
>
> -               if (sd->entry == &sort_mem_dcacheline && cacheline_size() == 0)
> -                       return -EINVAL;
> -
> -               if (sd->entry == &sort_mem_daddr_sym)
> -                       list->sym = 1;
> -
>                 __sort_dimension__add(sd, list, level);
>                 return 0;
>         }
> --
> 2.52.0.223.gf5cc29aaa4-goog
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-12-10 22:06 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-10  2:33 [PATCH 1/4] perf record: Split --data-mmap option Namhyung Kim
2025-12-10  2:33 ` [PATCH 2/4] perf report: Enable data-type profiling with -F option too Namhyung Kim
2025-12-10 22:05   ` Ian Rogers
2025-12-10  2:33 ` [PATCH 3/4] perf report: Fix histogram entry collapsing for -F option Namhyung Kim
2025-12-10 22:06   ` Ian Rogers
2025-12-10  2:33 ` [PATCH 4/4] perf report: Update sort key state from " Namhyung Kim
2025-12-10 22:06   ` Ian Rogers
2025-12-10 22:05 ` [PATCH 1/4] perf record: Split --data-mmap option Ian Rogers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).