From: Namhyung Kim <namhyung@kernel.org>
To: Stephen Brennan <stephen.s.brennan@oracle.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>,
Ingo Molnar <mingo@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Mark Rutland <mark.rutland@arm.com>,
Adrian Hunter <adrian.hunter@intel.com>,
Jiri Olsa <jolsa@kernel.org>,
linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
Wangyang Guo <wangyang.guo@intel.com>,
Dapeng Mi <dapeng1.mi@linux.intel.com>,
Tianyou Li <tianyou.li@intel.com>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
James Clark <james.clark@linaro.org>,
Ian Rogers <irogers@google.com>
Subject: Re: [PATCH] tools: perf: add comm_ignore_digit column
Date: Mon, 16 Mar 2026 10:48:00 -0700 [thread overview]
Message-ID: <abhCUFPsyWGJ6DNn@google.com> (raw)
In-Reply-To: <20260305181847.3249498-1-stephen.s.brennan@oracle.com>
Hello,
On Thu, Mar 05, 2026 at 10:18:47AM -0800, Stephen Brennan wrote:
> The "comm" column allows grouping events by the process command. It is
> intended to group like programs, despite having different PIDs. But some
> workloads may adjust their own command, so that a unique identifier
> (e.g. a PID or some other numeric value) is part of the command name.
> This destroys the utility of "comm", forcing perf to place each unique
> process name into its own bucket, which can contribute to a
> combinatorial explosion of memory use in perf report.
>
> Create a less strict version of this column, which ignores digits when
> comparing command names. This allows "similar looking" processes to
> again be placed in the same bucket.
Can you please rebase this onto the current perf-tools-next?
Thanks,
Namhyung
>
> Signed-off-by: Stephen Brennan <stephen.s.brennan@oracle.com>
> ---
> tools/perf/util/hist.c | 1 +
> tools/perf/util/hist.h | 1 +
> tools/perf/util/sort.c | 92 +++++++++++++++++++++++++++++++++++++++++-
> tools/perf/util/sort.h | 1 +
> 4 files changed, 94 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index ef4b569f7df46..6759826be8344 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -110,6 +110,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
> len = thread__comm_len(h->thread);
> if (hists__new_col_len(hists, HISTC_COMM, len))
> hists__set_col_len(hists, HISTC_THREAD, len + 8);
> + hists__new_col_len(hists, HISTC_COMM_IGNORE_DIGIT, len);
>
> if (h->ms.map) {
> len = dso__name_len(map__dso(h->ms.map));
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 1d5ea632ca4e1..ae7e98bd9e46d 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -44,6 +44,7 @@ enum hist_column {
> HISTC_THREAD,
> HISTC_TGID,
> HISTC_COMM,
> + HISTC_COMM_IGNORE_DIGIT,
> HISTC_CGROUP_ID,
> HISTC_CGROUP,
> HISTC_PARENT,
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index f3a565b0e2307..e6012b2457c5d 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -1,4 +1,5 @@
> // SPDX-License-Identifier: GPL-2.0
> +#include <ctype.h>
> #include <errno.h>
> #include <inttypes.h>
> #include <regex.h>
> @@ -265,6 +266,93 @@ struct sort_entry sort_comm = {
> .se_width_idx = HISTC_COMM,
> };
>
> +/* --sort comm_ignore_digit */
> +
> +static int64_t strcmp_nodigit(const char *left, const char *right)
> +{
> + for (;;) {
> + while (*left && isdigit(*left))
> + left++;
> + while (*right && isdigit(*right))
> + right++;
> + if (*left == *right && !*left) {
> + return 0;
> + } else if (*left == *right) {
> + left++;
> + right++;
> + } else {
> + return (int64_t)*left - (int64_t)*right;
> + }
> + }
> +}
> +
> +static int64_t
> +sort__comm_ignore_digit_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> + return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
> +}
> +
> +static int64_t
> +sort__comm_ignore_digit_collapse(struct hist_entry *left, struct hist_entry *right)
> +{
> + return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
> +}
> +
> +static int64_t
> +sort__comm_ignore_digit_sort(struct hist_entry *left, struct hist_entry *right)
> +{
> + return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
> +}
> +
> +static int hist_entry__comm_ignore_digit_snprintf(struct hist_entry *he, char *bf,
> + size_t size, unsigned int width)
> +{
> + int ret = 0;
> + unsigned int print_len, printed = 0, start = 0, end = 0;
> + bool in_digit;
> + const char *comm = comm__str(he->comm), *print;
> +
> + while (printed < width && printed < size && comm[start]) {
> + in_digit = !!isdigit(comm[start]);
> + end = start + 1;
> + while (comm[end] && !!isdigit(comm[end]) == in_digit)
> + end++;
> + if (in_digit) {
> + print_len = 3; /* <N> */
> + print = "<N>";
> + } else {
> + print_len = end - start;
> + print = &comm[start];
> + }
> + print_len = min(print_len, width - printed);
> + ret = repsep_snprintf(bf + printed, size - printed, "%-.*s",
> + print_len, print);
> + if (ret < 0)
> + return ret;
> + start = end;
> + printed += ret;
> + }
> + /* Pad to width if necessary */
> + if (printed < width && printed < size) {
> + ret = repsep_snprintf(bf + printed, size - printed, "%-*.*s",
> + width - printed, width - printed, "");
> + if (ret < 0)
> + return ret;
> + printed += ret;
> + }
> + return printed;
> +}
> +
> +struct sort_entry sort_comm_ignore_digit = {
> + .se_header = "CommandIgnoreDigit",
> + .se_cmp = sort__comm_ignore_digit_cmp,
> + .se_collapse = sort__comm_ignore_digit_collapse,
> + .se_sort = sort__comm_ignore_digit_sort,
> + .se_snprintf = hist_entry__comm_ignore_digit_snprintf,
> + .se_filter = hist_entry__thread_filter,
> + .se_width_idx = HISTC_COMM_IGNORE_DIGIT,
> +};
> +
> /* --sort dso */
>
> static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
> @@ -2576,6 +2664,7 @@ static struct sort_dimension common_sort_dimensions[] = {
> DIM(SORT_PID, "pid", sort_thread),
> DIM(SORT_TGID, "tgid", sort_tgid),
> DIM(SORT_COMM, "comm", sort_comm),
> + DIM(SORT_COMM_IGNORE_DIGIT, "comm_ignore_digit", sort_comm_ignore_digit),
> DIM(SORT_DSO, "dso", sort_dso),
> DIM(SORT_SYM, "symbol", sort_sym),
> DIM(SORT_PARENT, "parent", sort_parent),
> @@ -3675,7 +3764,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
> list->socket = 1;
> } else if (sd->entry == &sort_thread) {
> list->thread = 1;
> - } else if (sd->entry == &sort_comm) {
> + } else if (sd->entry == &sort_comm || sd->entry == &sort_comm_ignore_digit) {
> list->comm = 1;
> } else if (sd->entry == &sort_type_offset) {
> symbol_conf.annotate_data_member = true;
> @@ -4022,6 +4111,7 @@ static bool get_elide(int idx, FILE *output)
> case HISTC_DSO:
> return __get_elide(symbol_conf.dso_list, "dso", output);
> case HISTC_COMM:
> + case HISTC_COMM_IGNORE_DIGIT:
> return __get_elide(symbol_conf.comm_list, "comm", output);
> default:
> break;
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index d7787958e06b9..6819934b4d48a 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -43,6 +43,7 @@ enum sort_type {
> /* common sort keys */
> SORT_PID,
> SORT_COMM,
> + SORT_COMM_IGNORE_DIGIT,
> SORT_DSO,
> SORT_SYM,
> SORT_PARENT,
> --
> 2.47.3
>
next prev parent reply other threads:[~2026-03-16 17:48 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-05 18:18 [PATCH] tools: perf: add comm_ignore_digit column Stephen Brennan
2026-03-16 17:48 ` Namhyung Kim [this message]
2026-03-16 17:59 ` Stephen Brennan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=abhCUFPsyWGJ6DNn@google.com \
--to=namhyung@kernel.org \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=dapeng1.mi@linux.intel.com \
--cc=irogers@google.com \
--cc=james.clark@linaro.org \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=stephen.s.brennan@oracle.com \
--cc=tianyou.li@intel.com \
--cc=wangyang.guo@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox