Re: [PATCH v1 5/8] perf topology: Add core_wide

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ian Rogers <irogers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@kernel.org>, Namhyung Kim <namhyung@kernel.org>,
	Andi Kleen <ak@linux.intel.com>,
	Kan Liang <kan.liang@linux.intel.com>,
	Thomas Richter <tmricht@linux.ibm.com>,
	James Clark <james.clark@arm.com>,
	Miaoqian Lin <linmq006@gmail.com>,
	John Garry <john.garry@huawei.com>,
	Zhengjun Xing <zhengjun.xing@linux.intel.com>,
	Florian Fischer <florian.fischer@muhq.space>,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	perry.taylor@intel.com, caleb.biggers@intel.com,
	kshipra.bopardikar@intel.com,
	Stephane Eranian <eranian@google.com>
Subject: Re: [PATCH v1 5/8] perf topology: Add core_wide
Date: Wed, 31 Aug 2022 11:40:22 -0300	[thread overview]
Message-ID: <Yw9y1mEW7BhNvStT@kernel.org> (raw)
In-Reply-To: <20220830164846.401143-6-irogers@google.com>

Em Tue, Aug 30, 2022 at 09:48:43AM -0700, Ian Rogers escreveu:
> It is possible to optimize metrics when all SMT threads (CPUs) on a
> core are measuring events in system wide mode. For example, TMA
> metrics defines CORE_CLKS for Sandybrdige as:
> 
> if SMT is disabled:
>   CPU_CLK_UNHALTED.THREAD
> if SMT is enabled and recording on all SMT threads:
>   CPU_CLK_UNHALTED.THREAD_ANY / 2
> if SMT is enabled and not recording on all SMT threads:
>   (CPU_CLK_UNHALTED.THREAD/2)*
>   (1+CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE/CPU_CLK_UNHALTED.REF_XCLK )
> 
> That is two more events are necessary when not gathering counts on all
> SMT threads. To distinguish all SMT threads on a core vs system wide
> (all CPUs) call the new property core wide.  Add a core wide test that
> determines the property from user requested CPUs, the topology and
> system wide. System wide is required as other processes running on a
> SMT thread will change the counts.
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/util/cputopo.c | 46 +++++++++++++++++++++++++++++++++++++++
>  tools/perf/util/cputopo.h |  3 +++
>  tools/perf/util/smt.c     | 14 ++++++++++++
>  tools/perf/util/smt.h     |  7 ++++++
>  4 files changed, 70 insertions(+)
> 
> diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
> index 511002e52714..1a3ff6449158 100644
> --- a/tools/perf/util/cputopo.c
> +++ b/tools/perf/util/cputopo.c
> @@ -172,6 +172,52 @@ bool cpu_topology__smt_on(const struct cpu_topology *topology)
>  	return false;
>  }
>  
> +bool cpu_topology__core_wide(const struct cpu_topology *topology,
> +			     const char *user_requested_cpu_list)
> +{
> +	struct perf_cpu_map *user_requested_cpus;
> +
> +	/*
> +	 * If user_requested_cpu_list is empty then all CPUs are recorded and so
> +	 * core_wide is true.
> +	 */
> +	if (!user_requested_cpu_list)
> +		return true;
> +
> +	user_requested_cpus = perf_cpu_map__new(user_requested_cpu_list);

Don't we need a NULL test here?

> +	/* Check that every user requested CPU is the complete set of SMT threads on a core. */
> +	for (u32 i = 0; i < topology->core_cpus_lists; i++) {
> +		const char *core_cpu_list = topology->core_cpus_list[i];
> +		struct perf_cpu_map *core_cpus = perf_cpu_map__new(core_cpu_list);

Here too, no?

> +		struct perf_cpu cpu;
> +		int idx;
> +		bool has_first, first = true;
> +
> +		perf_cpu_map__for_each_cpu(cpu, idx, core_cpus) {
> +			if (first) {
> +				has_first = perf_cpu_map__has(user_requested_cpus, cpu);
> +				first = false;
> +			} else {
> +				/*
> +				 * If the first core CPU is user requested then
> +				 * all subsequent CPUs in the core must be user
> +				 * requested too. If the first CPU isn't user
> +				 * requested then none of the others must be
> +				 * too.
> +				 */
> +				if (perf_cpu_map__has(user_requested_cpus, cpu) != has_first) {
> +					perf_cpu_map__put(core_cpus);
> +					perf_cpu_map__put(user_requested_cpus);
> +					return false;
> +				}
> +			}
> +		}
> +		perf_cpu_map__put(core_cpus);
> +	}
> +	perf_cpu_map__put(user_requested_cpus);
> +	return true;
> +}
> +
>  static bool has_die_topology(void)
>  {
>  	char filename[MAXPATHLEN];
> diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
> index 469db775a13c..969e5920a00e 100644
> --- a/tools/perf/util/cputopo.h
> +++ b/tools/perf/util/cputopo.h
> @@ -60,6 +60,9 @@ struct cpu_topology *cpu_topology__new(void);
>  void cpu_topology__delete(struct cpu_topology *tp);
>  /* Determine from the core list whether SMT was enabled. */
>  bool cpu_topology__smt_on(const struct cpu_topology *topology);
> +/* Are the sets of SMT siblings all enabled or all disabled in user_requested_cpus. */
> +bool cpu_topology__core_wide(const struct cpu_topology *topology,
> +			     const char *user_requested_cpu_list);
>  
>  struct numa_topology *numa_topology__new(void);
>  void numa_topology__delete(struct numa_topology *tp);
> diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
> index ce90c4ee4138..994e9e418227 100644
> --- a/tools/perf/util/smt.c
> +++ b/tools/perf/util/smt.c
> @@ -21,3 +21,17 @@ bool smt_on(const struct cpu_topology *topology)
>  	cached = true;
>  	return cached_result;
>  }
> +
> +bool core_wide(bool system_wide, const char *user_requested_cpu_list,
> +	       const struct cpu_topology *topology)
> +{
> +	/* If not everything running on a core is being recorded then we can't use core_wide. */
> +	if (!system_wide)
> +		return false;
> +
> +	/* Cheap case that SMT is disabled and therefore we're inherently core_wide. */
> +	if (!smt_on(topology))
> +		return true;
> +
> +	return cpu_topology__core_wide(topology, user_requested_cpu_list);
> +}
> diff --git a/tools/perf/util/smt.h b/tools/perf/util/smt.h
> index e26999c6b8d4..ae9095f2c38c 100644
> --- a/tools/perf/util/smt.h
> +++ b/tools/perf/util/smt.h
> @@ -7,4 +7,11 @@ struct cpu_topology;
>  /* Returns true if SMT (aka hyperthreading) is enabled. */
>  bool smt_on(const struct cpu_topology *topology);
>  
> +/*
> + * Returns true when system wide and all SMT threads for a core are in the
> + * user_requested_cpus map.
> + */
> +bool core_wide(bool system_wide, const char *user_requested_cpu_list,
> +	       const struct cpu_topology *topology);
> +
>  #endif /* __SMT_H */
> -- 
> 2.37.2.672.g94769d06f0-goog

-- 

- Arnaldo

next prev parent reply	other threads:[~2022-08-31 14:40 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-30 16:48 [PATCH v1 0/8] Add core wide metric literal Ian Rogers
2022-08-30 16:48 ` [PATCH v1 1/8] perf smt: Tidy header guard add SPDX Ian Rogers
2022-08-31 14:46   ` Arnaldo Carvalho de Melo
2022-08-30 16:48 ` [PATCH v1 2/8] perf metric: Return early if no CPU PMU table exists Ian Rogers
2022-08-31 12:21   ` Arnaldo Carvalho de Melo
2022-08-31 12:30   ` Arnaldo Carvalho de Melo
2022-08-30 16:48 ` [PATCH v1 3/8] perf expr: Move the scanner_ctx into the parse_ctx Ian Rogers
2022-08-30 16:48 ` [PATCH v1 4/8] perf smt: Compute SMT from topology Ian Rogers
2022-08-30 16:48 ` [PATCH v1 5/8] perf topology: Add core_wide Ian Rogers
2022-08-31 14:40   ` Arnaldo Carvalho de Melo [this message]
2022-08-31 15:58     ` Ian Rogers
2022-08-31 16:20       ` Arnaldo Carvalho de Melo
2022-08-31 16:42         ` Ian Rogers
2022-08-30 16:48 ` [PATCH v1 6/8] perf stat: Delay metric parsing Ian Rogers
2022-08-31 14:42   ` Arnaldo Carvalho de Melo
2022-08-31 16:13     ` Ian Rogers
2022-08-30 16:48 ` [PATCH v1 7/8] perf metrics: Wire up core_wide Ian Rogers
2022-08-31 14:44   ` Arnaldo Carvalho de Melo
2022-08-31 16:38     ` Ian Rogers
2022-08-30 16:48 ` [PATCH v1 8/8] perf test: Add basic core_wide expression test Ian Rogers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Yw9y1mEW7BhNvStT@kernel.org \
    --to=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=caleb.biggers@intel.com \
    --cc=eranian@google.com \
    --cc=florian.fischer@muhq.space \
    --cc=irogers@google.com \
    --cc=james.clark@arm.com \
    --cc=john.garry@huawei.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=kshipra.bopardikar@intel.com \
    --cc=linmq006@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=perry.taylor@intel.com \
    --cc=peterz@infradead.org \
    --cc=tmricht@linux.ibm.com \
    --cc=zhengjun.xing@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.