All of lore.kernel.org
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Stephen Brennan <stephen.s.brennan@oracle.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>,
	Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Mark Rutland <mark.rutland@arm.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Jiri Olsa <jolsa@kernel.org>,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	Wangyang Guo <wangyang.guo@intel.com>,
	Dapeng Mi <dapeng1.mi@linux.intel.com>,
	Tianyou Li <tianyou.li@intel.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	James Clark <james.clark@linaro.org>,
	Ian Rogers <irogers@google.com>
Subject: Re: [PATCH] tools: perf: add comm_ignore_digit column
Date: Mon, 16 Mar 2026 10:48:00 -0700	[thread overview]
Message-ID: <abhCUFPsyWGJ6DNn@google.com> (raw)
In-Reply-To: <20260305181847.3249498-1-stephen.s.brennan@oracle.com>

Hello,

On Thu, Mar 05, 2026 at 10:18:47AM -0800, Stephen Brennan wrote:
> The "comm" column allows grouping events by the process command. It is
> intended to group like programs, despite having different PIDs. But some
> workloads may adjust their own command, so that a unique identifier
> (e.g. a PID or some other numeric value) is part of the command name.
> This destroys the utility of "comm", forcing perf to place each unique
> process name into its own bucket, which can contribute to a
> combinatorial explosion of memory use in perf report.
> 
> Create a less strict version of this column, which ignores digits when
> comparing command names. This allows "similar looking" processes to
> again be placed in the same bucket.

Can you please rebase this onto the current perf-tools-next?

Thanks,
Namhyung

> 
> Signed-off-by: Stephen Brennan <stephen.s.brennan@oracle.com>
> ---
>  tools/perf/util/hist.c |  1 +
>  tools/perf/util/hist.h |  1 +
>  tools/perf/util/sort.c | 92 +++++++++++++++++++++++++++++++++++++++++-
>  tools/perf/util/sort.h |  1 +
>  4 files changed, 94 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index ef4b569f7df46..6759826be8344 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -110,6 +110,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>  	len = thread__comm_len(h->thread);
>  	if (hists__new_col_len(hists, HISTC_COMM, len))
>  		hists__set_col_len(hists, HISTC_THREAD, len + 8);
> +	hists__new_col_len(hists, HISTC_COMM_IGNORE_DIGIT, len);
>  
>  	if (h->ms.map) {
>  		len = dso__name_len(map__dso(h->ms.map));
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 1d5ea632ca4e1..ae7e98bd9e46d 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -44,6 +44,7 @@ enum hist_column {
>  	HISTC_THREAD,
>  	HISTC_TGID,
>  	HISTC_COMM,
> +	HISTC_COMM_IGNORE_DIGIT,
>  	HISTC_CGROUP_ID,
>  	HISTC_CGROUP,
>  	HISTC_PARENT,
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index f3a565b0e2307..e6012b2457c5d 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -1,4 +1,5 @@
>  // SPDX-License-Identifier: GPL-2.0
> +#include <ctype.h>
>  #include <errno.h>
>  #include <inttypes.h>
>  #include <regex.h>
> @@ -265,6 +266,93 @@ struct sort_entry sort_comm = {
>  	.se_width_idx	= HISTC_COMM,
>  };
>  
> +/* --sort comm_ignore_digit */
> +
> +static int64_t strcmp_nodigit(const char *left, const char *right)
> +{
> +	for (;;) {
> +		while (*left && isdigit(*left))
> +			left++;
> +		while (*right && isdigit(*right))
> +			right++;
> +		if (*left == *right && !*left) {
> +			return 0;
> +		} else if (*left == *right) {
> +			left++;
> +			right++;
> +		} else {
> +			return (int64_t)*left - (int64_t)*right;
> +		}
> +	}
> +}
> +
> +static int64_t
> +sort__comm_ignore_digit_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> +	return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
> +}
> +
> +static int64_t
> +sort__comm_ignore_digit_collapse(struct hist_entry *left, struct hist_entry *right)
> +{
> +	return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
> +}
> +
> +static int64_t
> +sort__comm_ignore_digit_sort(struct hist_entry *left, struct hist_entry *right)
> +{
> +	return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
> +}
> +
> +static int hist_entry__comm_ignore_digit_snprintf(struct hist_entry *he, char *bf,
> +						size_t size, unsigned int width)
> +{
> +	int ret = 0;
> +	unsigned int print_len, printed = 0, start = 0, end = 0;
> +	bool in_digit;
> +	const char *comm = comm__str(he->comm), *print;
> +
> +	while (printed < width && printed < size && comm[start]) {
> +		in_digit = !!isdigit(comm[start]);
> +		end = start + 1;
> +		while (comm[end] && !!isdigit(comm[end]) == in_digit)
> +			end++;
> +		if (in_digit) {
> +			print_len = 3; /* <N> */
> +			print = "<N>";
> +		} else {
> +			print_len = end - start;
> +			print = &comm[start];
> +		}
> +		print_len = min(print_len, width - printed);
> +		ret = repsep_snprintf(bf + printed, size - printed, "%-.*s",
> +					print_len, print);
> +		if (ret < 0)
> +			return ret;
> +		start = end;
> +		printed += ret;
> +	}
> +	/* Pad to width if necessary */
> +	if (printed < width && printed < size) {
> +		ret = repsep_snprintf(bf + printed, size - printed, "%-*.*s",
> +				       width - printed, width - printed, "");
> +		if (ret < 0)
> +			return ret;
> +		printed += ret;
> +	}
> +	return printed;
> +}
> +
> +struct sort_entry sort_comm_ignore_digit = {
> +	.se_header	= "CommandIgnoreDigit",
> +	.se_cmp		= sort__comm_ignore_digit_cmp,
> +	.se_collapse	= sort__comm_ignore_digit_collapse,
> +	.se_sort	= sort__comm_ignore_digit_sort,
> +	.se_snprintf	= hist_entry__comm_ignore_digit_snprintf,
> +	.se_filter	= hist_entry__thread_filter,
> +	.se_width_idx	= HISTC_COMM_IGNORE_DIGIT,
> +};
> +
>  /* --sort dso */
>  
>  static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
> @@ -2576,6 +2664,7 @@ static struct sort_dimension common_sort_dimensions[] = {
>  	DIM(SORT_PID, "pid", sort_thread),
>  	DIM(SORT_TGID, "tgid", sort_tgid),
>  	DIM(SORT_COMM, "comm", sort_comm),
> +	DIM(SORT_COMM_IGNORE_DIGIT, "comm_ignore_digit", sort_comm_ignore_digit),
>  	DIM(SORT_DSO, "dso", sort_dso),
>  	DIM(SORT_SYM, "symbol", sort_sym),
>  	DIM(SORT_PARENT, "parent", sort_parent),
> @@ -3675,7 +3764,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
>  			list->socket = 1;
>  		} else if (sd->entry == &sort_thread) {
>  			list->thread = 1;
> -		} else if (sd->entry == &sort_comm) {
> +		} else if (sd->entry == &sort_comm || sd->entry == &sort_comm_ignore_digit) {
>  			list->comm = 1;
>  		} else if (sd->entry == &sort_type_offset) {
>  			symbol_conf.annotate_data_member = true;
> @@ -4022,6 +4111,7 @@ static bool get_elide(int idx, FILE *output)
>  	case HISTC_DSO:
>  		return __get_elide(symbol_conf.dso_list, "dso", output);
>  	case HISTC_COMM:
> +	case HISTC_COMM_IGNORE_DIGIT:
>  		return __get_elide(symbol_conf.comm_list, "comm", output);
>  	default:
>  		break;
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index d7787958e06b9..6819934b4d48a 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -43,6 +43,7 @@ enum sort_type {
>  	/* common sort keys */
>  	SORT_PID,
>  	SORT_COMM,
> +	SORT_COMM_IGNORE_DIGIT,
>  	SORT_DSO,
>  	SORT_SYM,
>  	SORT_PARENT,
> -- 
> 2.47.3
> 

  reply	other threads:[~2026-03-16 17:48 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-05 18:18 [PATCH] tools: perf: add comm_ignore_digit column Stephen Brennan
2026-03-16 17:48 ` Namhyung Kim [this message]
2026-03-16 17:59   ` Stephen Brennan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=abhCUFPsyWGJ6DNn@google.com \
    --to=namhyung@kernel.org \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=dapeng1.mi@linux.intel.com \
    --cc=irogers@google.com \
    --cc=james.clark@linaro.org \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=stephen.s.brennan@oracle.com \
    --cc=tianyou.li@intel.com \
    --cc=wangyang.guo@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.