Igt-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
To: <igt-dev@lists.freedesktop.org>,
	Lionel G Landwerlin <lionel.g.landwerlin@intel.com>,
	Ashutosh Dixit <ashutosh.dixit@intel.com>
Subject: Re: [igt-dev] [PATCH i-g-t v6 28/36] lib/i915/perf-config: extend the device info
Date: Mon, 10 Oct 2022 15:57:59 -0700	[thread overview]
Message-ID: <Y0Sjd9GjZJVZIML9@unerlige-ril> (raw)
In-Reply-To: <20221010214215.5378-29-umesh.nerlige.ramappa@intel.com>

On Mon, Oct 10, 2022 at 09:42:07PM +0000, Umesh Nerlige Ramappa wrote:
>From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>

>
>This will allow equations to check for finer information on the
>topology. Also add EuDualSubslicesSlice0123Count.
>
>Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>---
> lib/i915/perf-configs/codegen.py |  1 +
> lib/i915/perf.c                  | 31 ++++++++++++
> lib/i915/perf.h                  | 85 +++++++++++++++++++++++++++++++-
> lib/meson.build                  |  4 +-
> 4 files changed, 118 insertions(+), 3 deletions(-)
>
>diff --git a/lib/i915/perf-configs/codegen.py b/lib/i915/perf-configs/codegen.py
>index 8268c606..93a2df4a 100644
>--- a/lib/i915/perf-configs/codegen.py
>+++ b/lib/i915/perf-configs/codegen.py
>@@ -169,6 +169,7 @@ class Gen:
>             "$EuSlicesTotalCount": { 'c': "perf->devinfo.n_eu_slices" },
>             "$EuSubslicesTotalCount": { 'c': "perf->devinfo.n_eu_sub_slices" },
>             "$EuDualSubslicesTotalCount": { 'c': "perf->devinfo.n_eu_sub_slices" },
>+            "$EuDualSubslicesSlice0123Count": { 'c': "perf->devinfo.n_eu_sub_slices_half_slices" },
>             "$EuThreadsCount": { 'c': "perf->devinfo.eu_threads_count" },
>             "$SliceMask": { 'c': "perf->devinfo.slice_mask" },
>             "$DualSubsliceMask": { 'c': "perf->devinfo.subslice_mask" },
>diff --git a/lib/i915/perf.c b/lib/i915/perf.c
>index 05730d64..298e4d0e 100644
>--- a/lib/i915/perf.c
>+++ b/lib/i915/perf.c
>@@ -153,6 +153,10 @@ intel_perf_for_devinfo(uint32_t device_id,
> {
> 	const struct intel_device_info *devinfo = intel_get_device_info(device_id);
> 	struct intel_perf *perf;
>+	uint32_t subslice_mask_len;
>+	uint32_t eu_mask_len;
>+	uint32_t half_max_subslices;
>+	uint64_t half_subslices_mask;
> 	int bits_per_subslice;
>
> 	if (!devinfo)
>@@ -180,6 +184,25 @@ intel_perf_for_devinfo(uint32_t device_id,
> 			 "%s", devinfo->codename);
> 	}
>
>+	/* Store i915 topology. */
>+	perf->devinfo.max_slices = topology->max_slices;
>+	perf->devinfo.max_subslices_per_slice = topology->max_subslices;
>+	perf->devinfo.max_eu_per_subslice = topology->max_eus_per_subslice;
>+
>+	subslice_mask_len =
>+		topology->max_slices * topology->subslice_stride;
>+	assert(sizeof(perf->devinfo.subslice_masks) >= subslice_mask_len);
>+	memcpy(perf->devinfo.subslice_masks,
>+	       &topology->data[topology->subslice_offset],
>+	       subslice_mask_len);
>+
>+	eu_mask_len = topology->eu_stride *
>+		topology->max_subslices * topology->max_slices;
>+	assert(sizeof(perf->devinfo.eu_masks) >= eu_mask_len);
>+	memcpy(perf->devinfo.eu_masks,
>+	       &topology->data[topology->eu_offset],
>+	       eu_mask_len);
>+
> 	/* On Gen11+ the equations from the xml files expect an 8bits
> 	 * mask per subslice, versus only 3bits on prior Gens.
> 	 */
>@@ -205,6 +228,14 @@ intel_perf_for_devinfo(uint32_t device_id,
> 	perf->devinfo.n_eu_slices = __builtin_popcount(perf->devinfo.slice_mask);
> 	perf->devinfo.n_eu_sub_slices = __builtin_popcount(perf->devinfo.subslice_mask);
>
>+	/* Compute number of subslices/dualsubslices in first half of
>+	 * the GPU.
>+	 */
>+	half_max_subslices = topology->max_subslices / 2;
>+	half_subslices_mask = perf->devinfo.subslice_mask &
>+		((1 << half_max_subslices) - 1);
>+	perf->devinfo.n_eu_sub_slices_half_slices = __builtin_popcount(half_subslices_mask);
>+
> 	/* Valid on most generations except Gen9LP. */
> 	perf->devinfo.eu_threads_count = 7;
>
>diff --git a/lib/i915/perf.h b/lib/i915/perf.h
>index 6803c149..1493da47 100644
>--- a/lib/i915/perf.h
>+++ b/lib/i915/perf.h
>@@ -32,7 +32,11 @@ extern "C" {
>
> #include "igt_list.h"
>
>-struct intel_device_info;
>+#define DIV_ROUND_UP(a, b)  (((a) + (b) - 1) / (b))
>+
>+#define INTEL_DEVICE_MAX_SLICES           (6)  /* Maximum on gfx10 */
>+#define INTEL_DEVICE_MAX_SUBSLICES        (8)  /* Maximum on gfx11 */
>+#define INTEL_DEVICE_MAX_EUS_PER_SUBSLICE (16) /* Maximum on gfx12 */
>
> struct intel_perf_devinfo {
> 	char devname[20];
>@@ -77,12 +81,66 @@ struct intel_perf_devinfo {
> 	uint64_t n_eu_slices;
> 	/* Total number of subslices/dualsubslices */
> 	uint64_t n_eu_sub_slices;
>+	/* Number of subslices/dualsubslices in the first half of the
>+	 * slices.
>+	 */
>+	uint64_t n_eu_sub_slices_half_slices;
> 	/* Mask of available subslices/dualsubslices */
> 	uint64_t subslice_mask;
> 	/* Mask of available slices */
> 	uint64_t slice_mask;
> 	/* Number of threads in one EU */
> 	uint64_t eu_threads_count;
>+
>+	/**
>+	 * Maximu number of slices present on this device (can be more than
>+	 * num_slices if some slices are fused).
>+	 */
>+	uint16_t max_slices;
>+
>+	/**
>+	 * Maximu number of subslices per slice present on this device (can be more
>+	 * than the maximum value in the num_subslices[] array if some subslices are
>+	 * fused).
>+	 */
>+	uint16_t max_subslices_per_slice;
>+
>+	/**
>+	 * Stride to access subslice_masks[].
>+	 */
>+	uint16_t subslice_slice_stride;
>+
>+	/**
>+	 * Maximum number of EUs per subslice (can be more than
>+	 * num_eu_per_subslice if some EUs are fused off).
>+	 */
>+	uint16_t max_eu_per_subslice;
>+
>+	/**
>+	 * Strides to access eu_masks[].
>+	 */
>+	uint16_t eu_slice_stride;
>+	uint16_t eu_subslice_stride;
>+
>+	/**
>+	 * A bit mask of the slices available.
>+	 */
>+	uint8_t slice_masks[DIV_ROUND_UP(INTEL_DEVICE_MAX_SLICES, 8)];
>+
>+	/**
>+	 * An array of bit mask of the subslices available, use subslice_slice_stride
>+	 * to access this array.
>+	 */
>+	uint8_t subslice_masks[INTEL_DEVICE_MAX_SLICES *
>+			       DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)];
>+
>+	/**
>+	 * An array of bit mask of EUs available, use eu_slice_stride &
>+	 * eu_subslice_stride to access this array.
>+	 */
>+	uint8_t eu_masks[INTEL_DEVICE_MAX_SLICES *
>+			 INTEL_DEVICE_MAX_SUBSLICES *
>+			 DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)];
> };
>
> typedef enum {
>@@ -232,6 +290,31 @@ struct intel_perf {
> struct drm_i915_perf_record_header;
> struct drm_i915_query_topology_info;
>
>+static inline bool
>+intel_perf_devinfo_slice_available(const struct intel_perf_devinfo *devinfo,
>+				   int slice)
>+{
>+	return (devinfo->slice_masks[slice / 8] & (1U << (slice % 8))) != 0;
>+}
>+
>+static inline bool
>+intel_perf_devinfo_subslice_available(const struct intel_perf_devinfo *devinfo,
>+				      int slice, int subslice)
>+{
>+	return (devinfo->subslice_masks[slice * devinfo->subslice_slice_stride +
>+					subslice / 8] & (1U << (subslice % 8))) != 0;
>+}
>+
>+static inline bool
>+intel_perf_devinfo_eu_available(const struct intel_perf_devinfo *devinfo,
>+				int slice, int subslice, int eu)
>+{
>+	unsigned subslice_offset = slice * devinfo->eu_slice_stride +
>+		subslice * devinfo->eu_subslice_stride;
>+
>+	return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0;
>+}
>+
> struct intel_perf *intel_perf_for_fd(int drm_fd);
> struct intel_perf *intel_perf_for_devinfo(uint32_t device_id,
> 					  uint32_t revision,
>diff --git a/lib/meson.build b/lib/meson.build
>index b51cf23c..b319a3c8 100644
>--- a/lib/meson.build
>+++ b/lib/meson.build
>@@ -308,7 +308,7 @@ lib_igt_i915_perf_build = shared_library(
>   dependencies: lib_igt_chipset,
>   include_directories : inc,
>   install: true,
>-  soversion: '1.4')
>+  soversion: '1.5')
>
> lib_igt_i915_perf = declare_dependency(
>   link_with : lib_igt_i915_perf_build,
>@@ -329,7 +329,7 @@ pkgconf.set('prefix', get_option('prefix'))
> pkgconf.set('exec_prefix', '${prefix}')
> pkgconf.set('libdir', '${prefix}/@0@'.format(get_option('libdir')))
> pkgconf.set('includedir', '${prefix}/@0@'.format(get_option('includedir')))
>-pkgconf.set('i915_perf_version', '1.4.0')
>+pkgconf.set('i915_perf_version', '1.5.0')
>
> configure_file(
>   input : 'i915-perf.pc.in',
>-- 
>2.25.1
>

  reply	other threads:[~2022-10-10 22:59 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-10 21:41 [igt-dev] [PATCH i-g-t v6 00/36] Add DG2 OA test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 01/36] i915/perf: Check regularly if we are done reading reports Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 02/36] i915/perf: Fix OA short_reads test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 03/36] i915/perf: Check return value from getparam Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 04/36] i915/perf: Limit sseu-config tests for gen11 Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 05/36] i915/perf: Account for OA sampling interval in polling test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 06/36] i915/perf: Define OA report types and fix oa-formats test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 07/36] i915/perf: Use ARRAY_SIZE consistently for num_properties Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 08/36] i915/perf: Use gt in perf tests and lib Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 09/36] i915/perf: Explicitly state rendercopy needs for a test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 10/36] i915/perf: Skip tests that use rendercopy Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 11/36] i915/perf: Add OA formats for DG2 Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 12/36] i915/perf: Fix CS timestamp vs OA timstamp mismatch Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 13/36] i915/perf: Wait longer for rc6 residency in DG2 Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 14/36] lib/i915/perf: implement report accumulation for new format Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 15/36] lib/i915/perf: fixup conversion script for XEHPSDV Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 16/36] lib/i915/perf: make warning message more helpful Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 17/36] lib/i915/perf: expose new operators for codegen Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 18/36] tools/i915-perf-recorder: add ability to select device Umesh Nerlige Ramappa
2022-10-10 22:06   ` Umesh Nerlige Ramappa
2022-10-11  6:22   ` Petri Latvala
2022-10-18 22:52     ` Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 19/36] lib/i915/perf: fixup report validity Umesh Nerlige Ramappa
2022-10-10 22:08   ` Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 20/36] lib/i915/perf: add a helper to read timestamps Umesh Nerlige Ramappa
2022-10-10 22:22   ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 21/36] lib/i915/perf: store bit shifting required for OA timestamps Umesh Nerlige Ramappa
2022-10-10 22:52   ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 22/36] lib/i915/perf: indentation fix Umesh Nerlige Ramappa
2022-10-10 22:52   ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 23/36] tools/i915-perf-recorder: capture OA & CS frequencies Umesh Nerlige Ramappa
2022-10-10 22:54   ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 24/36] tools/i915-perf: make timestamp range easier to compare Umesh Nerlige Ramappa
2022-10-10 22:54   ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 25/36] tools/i915-perf: printout CPU clock used Umesh Nerlige Ramappa
2022-10-10 22:55   ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 26/36] tools/i915-perf: record remaining perf data on exit Umesh Nerlige Ramappa
2022-10-10 22:55   ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 27/36] lib/i915/perf: add support for new EuDualSubslicesTotalCount var Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 28/36] lib/i915/perf-config: extend the device info Umesh Nerlige Ramappa
2022-10-10 22:57   ` Umesh Nerlige Ramappa [this message]
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 29/36] i915/perf: update import script Umesh Nerlige Ramappa
2022-10-10 23:00   ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 30/36] lib/i915/perf: add a raw timestamp utility Umesh Nerlige Ramappa
2022-10-10 23:00   ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 31/36] lib/i915/perf: add helper function to get report reason Umesh Nerlige Ramappa
2022-10-10 23:02   ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 32/36] tools/i915-perf: add option to printout reports data Umesh Nerlige Ramappa
2022-10-10 23:03   ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 33/36] lib/i915: prepare codegen for new ACM/DG2 variables Umesh Nerlige Ramappa
2022-10-10 23:04   ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 34/36] lib/i915/perf: Add ACM GT1 metrics Umesh Nerlige Ramappa
2022-10-18 22:49   ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 35/36] lib/i915/perf: Add ACM GT2 metrics Umesh Nerlige Ramappa
2022-10-18 22:49   ` Umesh Nerlige Ramappa
2022-10-10 22:01 ` [igt-dev] [PATCH i-g-t v6 00/36] Add DG2 OA test Umesh Nerlige Ramappa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y0Sjd9GjZJVZIML9@unerlige-ril \
    --to=umesh.nerlige.ramappa@intel.com \
    --cc=ashutosh.dixit@intel.com \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=lionel.g.landwerlin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox