public inbox for linux-perf-users@vger.kernel.org
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Stephen Brennan <stephen.s.brennan@oracle.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>,
	Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Mark Rutland <mark.rutland@arm.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Jiri Olsa <jolsa@kernel.org>,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	Wangyang Guo <wangyang.guo@intel.com>,
	Dapeng Mi <dapeng1.mi@linux.intel.com>,
	Tianyou Li <tianyou.li@intel.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	James Clark <james.clark@linaro.org>,
	Ian Rogers <irogers@google.com>
Subject: Re: [PATCH] tools: perf: add comm_ignore_digit column
Date: Mon, 16 Mar 2026 10:48:00 -0700	[thread overview]
Message-ID: <abhCUFPsyWGJ6DNn@google.com> (raw)
In-Reply-To: <20260305181847.3249498-1-stephen.s.brennan@oracle.com>

Hello,

On Thu, Mar 05, 2026 at 10:18:47AM -0800, Stephen Brennan wrote:
> The "comm" column allows grouping events by the process command. It is
> intended to group like programs, despite having different PIDs. But some
> workloads may adjust their own command, so that a unique identifier
> (e.g. a PID or some other numeric value) is part of the command name.
> This destroys the utility of "comm", forcing perf to place each unique
> process name into its own bucket, which can contribute to a
> combinatorial explosion of memory use in perf report.
> 
> Create a less strict version of this column, which ignores digits when
> comparing command names. This allows "similar looking" processes to
> again be placed in the same bucket.

Can you please rebase this onto the current perf-tools-next?

Thanks,
Namhyung

> 
> Signed-off-by: Stephen Brennan <stephen.s.brennan@oracle.com>
> ---
>  tools/perf/util/hist.c |  1 +
>  tools/perf/util/hist.h |  1 +
>  tools/perf/util/sort.c | 92 +++++++++++++++++++++++++++++++++++++++++-
>  tools/perf/util/sort.h |  1 +
>  4 files changed, 94 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index ef4b569f7df46..6759826be8344 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -110,6 +110,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>  	len = thread__comm_len(h->thread);
>  	if (hists__new_col_len(hists, HISTC_COMM, len))
>  		hists__set_col_len(hists, HISTC_THREAD, len + 8);
> +	hists__new_col_len(hists, HISTC_COMM_IGNORE_DIGIT, len);
>  
>  	if (h->ms.map) {
>  		len = dso__name_len(map__dso(h->ms.map));
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 1d5ea632ca4e1..ae7e98bd9e46d 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -44,6 +44,7 @@ enum hist_column {
>  	HISTC_THREAD,
>  	HISTC_TGID,
>  	HISTC_COMM,
> +	HISTC_COMM_IGNORE_DIGIT,
>  	HISTC_CGROUP_ID,
>  	HISTC_CGROUP,
>  	HISTC_PARENT,
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index f3a565b0e2307..e6012b2457c5d 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -1,4 +1,5 @@
>  // SPDX-License-Identifier: GPL-2.0
> +#include <ctype.h>
>  #include <errno.h>
>  #include <inttypes.h>
>  #include <regex.h>
> @@ -265,6 +266,93 @@ struct sort_entry sort_comm = {
>  	.se_width_idx	= HISTC_COMM,
>  };
>  
> +/* --sort comm_ignore_digit */
> +
> +static int64_t strcmp_nodigit(const char *left, const char *right)
> +{
> +	for (;;) {
> +		while (*left && isdigit(*left))
> +			left++;
> +		while (*right && isdigit(*right))
> +			right++;
> +		if (*left == *right && !*left) {
> +			return 0;
> +		} else if (*left == *right) {
> +			left++;
> +			right++;
> +		} else {
> +			return (int64_t)*left - (int64_t)*right;
> +		}
> +	}
> +}
> +
> +static int64_t
> +sort__comm_ignore_digit_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> +	return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
> +}
> +
> +static int64_t
> +sort__comm_ignore_digit_collapse(struct hist_entry *left, struct hist_entry *right)
> +{
> +	return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
> +}
> +
> +static int64_t
> +sort__comm_ignore_digit_sort(struct hist_entry *left, struct hist_entry *right)
> +{
> +	return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
> +}
> +
> +static int hist_entry__comm_ignore_digit_snprintf(struct hist_entry *he, char *bf,
> +						size_t size, unsigned int width)
> +{
> +	int ret = 0;
> +	unsigned int print_len, printed = 0, start = 0, end = 0;
> +	bool in_digit;
> +	const char *comm = comm__str(he->comm), *print;
> +
> +	while (printed < width && printed < size && comm[start]) {
> +		in_digit = !!isdigit(comm[start]);
> +		end = start + 1;
> +		while (comm[end] && !!isdigit(comm[end]) == in_digit)
> +			end++;
> +		if (in_digit) {
> +			print_len = 3; /* <N> */
> +			print = "<N>";
> +		} else {
> +			print_len = end - start;
> +			print = &comm[start];
> +		}
> +		print_len = min(print_len, width - printed);
> +		ret = repsep_snprintf(bf + printed, size - printed, "%-.*s",
> +					print_len, print);
> +		if (ret < 0)
> +			return ret;
> +		start = end;
> +		printed += ret;
> +	}
> +	/* Pad to width if necessary */
> +	if (printed < width && printed < size) {
> +		ret = repsep_snprintf(bf + printed, size - printed, "%-*.*s",
> +				       width - printed, width - printed, "");
> +		if (ret < 0)
> +			return ret;
> +		printed += ret;
> +	}
> +	return printed;
> +}
> +
> +struct sort_entry sort_comm_ignore_digit = {
> +	.se_header	= "CommandIgnoreDigit",
> +	.se_cmp		= sort__comm_ignore_digit_cmp,
> +	.se_collapse	= sort__comm_ignore_digit_collapse,
> +	.se_sort	= sort__comm_ignore_digit_sort,
> +	.se_snprintf	= hist_entry__comm_ignore_digit_snprintf,
> +	.se_filter	= hist_entry__thread_filter,
> +	.se_width_idx	= HISTC_COMM_IGNORE_DIGIT,
> +};
> +
>  /* --sort dso */
>  
>  static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
> @@ -2576,6 +2664,7 @@ static struct sort_dimension common_sort_dimensions[] = {
>  	DIM(SORT_PID, "pid", sort_thread),
>  	DIM(SORT_TGID, "tgid", sort_tgid),
>  	DIM(SORT_COMM, "comm", sort_comm),
> +	DIM(SORT_COMM_IGNORE_DIGIT, "comm_ignore_digit", sort_comm_ignore_digit),
>  	DIM(SORT_DSO, "dso", sort_dso),
>  	DIM(SORT_SYM, "symbol", sort_sym),
>  	DIM(SORT_PARENT, "parent", sort_parent),
> @@ -3675,7 +3764,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
>  			list->socket = 1;
>  		} else if (sd->entry == &sort_thread) {
>  			list->thread = 1;
> -		} else if (sd->entry == &sort_comm) {
> +		} else if (sd->entry == &sort_comm || sd->entry == &sort_comm_ignore_digit) {
>  			list->comm = 1;
>  		} else if (sd->entry == &sort_type_offset) {
>  			symbol_conf.annotate_data_member = true;
> @@ -4022,6 +4111,7 @@ static bool get_elide(int idx, FILE *output)
>  	case HISTC_DSO:
>  		return __get_elide(symbol_conf.dso_list, "dso", output);
>  	case HISTC_COMM:
> +	case HISTC_COMM_IGNORE_DIGIT:
>  		return __get_elide(symbol_conf.comm_list, "comm", output);
>  	default:
>  		break;
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index d7787958e06b9..6819934b4d48a 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -43,6 +43,7 @@ enum sort_type {
>  	/* common sort keys */
>  	SORT_PID,
>  	SORT_COMM,
> +	SORT_COMM_IGNORE_DIGIT,
>  	SORT_DSO,
>  	SORT_SYM,
>  	SORT_PARENT,
> -- 
> 2.47.3
> 

  reply	other threads:[~2026-03-16 17:48 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-05 18:18 [PATCH] tools: perf: add comm_ignore_digit column Stephen Brennan
2026-03-16 17:48 ` Namhyung Kim [this message]
2026-03-16 17:59   ` Stephen Brennan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=abhCUFPsyWGJ6DNn@google.com \
    --to=namhyung@kernel.org \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=dapeng1.mi@linux.intel.com \
    --cc=irogers@google.com \
    --cc=james.clark@linaro.org \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=stephen.s.brennan@oracle.com \
    --cc=tianyou.li@intel.com \
    --cc=wangyang.guo@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox