From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
To: <igt-dev@lists.freedesktop.org>,
Lionel G Landwerlin <lionel.g.landwerlin@intel.com>,
Ashutosh Dixit <ashutosh.dixit@intel.com>
Subject: Re: [igt-dev] [PATCH i-g-t v6 28/36] lib/i915/perf-config: extend the device info
Date: Mon, 10 Oct 2022 15:57:59 -0700 [thread overview]
Message-ID: <Y0Sjd9GjZJVZIML9@unerlige-ril> (raw)
In-Reply-To: <20221010214215.5378-29-umesh.nerlige.ramappa@intel.com>
On Mon, Oct 10, 2022 at 09:42:07PM +0000, Umesh Nerlige Ramappa wrote:
>From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>
>This will allow equations to check for finer information on the
>topology. Also add EuDualSubslicesSlice0123Count.
>
>Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>---
> lib/i915/perf-configs/codegen.py | 1 +
> lib/i915/perf.c | 31 ++++++++++++
> lib/i915/perf.h | 85 +++++++++++++++++++++++++++++++-
> lib/meson.build | 4 +-
> 4 files changed, 118 insertions(+), 3 deletions(-)
>
>diff --git a/lib/i915/perf-configs/codegen.py b/lib/i915/perf-configs/codegen.py
>index 8268c606..93a2df4a 100644
>--- a/lib/i915/perf-configs/codegen.py
>+++ b/lib/i915/perf-configs/codegen.py
>@@ -169,6 +169,7 @@ class Gen:
> "$EuSlicesTotalCount": { 'c': "perf->devinfo.n_eu_slices" },
> "$EuSubslicesTotalCount": { 'c': "perf->devinfo.n_eu_sub_slices" },
> "$EuDualSubslicesTotalCount": { 'c': "perf->devinfo.n_eu_sub_slices" },
>+ "$EuDualSubslicesSlice0123Count": { 'c': "perf->devinfo.n_eu_sub_slices_half_slices" },
> "$EuThreadsCount": { 'c': "perf->devinfo.eu_threads_count" },
> "$SliceMask": { 'c': "perf->devinfo.slice_mask" },
> "$DualSubsliceMask": { 'c': "perf->devinfo.subslice_mask" },
>diff --git a/lib/i915/perf.c b/lib/i915/perf.c
>index 05730d64..298e4d0e 100644
>--- a/lib/i915/perf.c
>+++ b/lib/i915/perf.c
>@@ -153,6 +153,10 @@ intel_perf_for_devinfo(uint32_t device_id,
> {
> const struct intel_device_info *devinfo = intel_get_device_info(device_id);
> struct intel_perf *perf;
>+ uint32_t subslice_mask_len;
>+ uint32_t eu_mask_len;
>+ uint32_t half_max_subslices;
>+ uint64_t half_subslices_mask;
> int bits_per_subslice;
>
> if (!devinfo)
>@@ -180,6 +184,25 @@ intel_perf_for_devinfo(uint32_t device_id,
> "%s", devinfo->codename);
> }
>
>+ /* Store i915 topology. */
>+ perf->devinfo.max_slices = topology->max_slices;
>+ perf->devinfo.max_subslices_per_slice = topology->max_subslices;
>+ perf->devinfo.max_eu_per_subslice = topology->max_eus_per_subslice;
>+
>+ subslice_mask_len =
>+ topology->max_slices * topology->subslice_stride;
>+ assert(sizeof(perf->devinfo.subslice_masks) >= subslice_mask_len);
>+ memcpy(perf->devinfo.subslice_masks,
>+ &topology->data[topology->subslice_offset],
>+ subslice_mask_len);
>+
>+ eu_mask_len = topology->eu_stride *
>+ topology->max_subslices * topology->max_slices;
>+ assert(sizeof(perf->devinfo.eu_masks) >= eu_mask_len);
>+ memcpy(perf->devinfo.eu_masks,
>+ &topology->data[topology->eu_offset],
>+ eu_mask_len);
>+
> /* On Gen11+ the equations from the xml files expect an 8bits
> * mask per subslice, versus only 3bits on prior Gens.
> */
>@@ -205,6 +228,14 @@ intel_perf_for_devinfo(uint32_t device_id,
> perf->devinfo.n_eu_slices = __builtin_popcount(perf->devinfo.slice_mask);
> perf->devinfo.n_eu_sub_slices = __builtin_popcount(perf->devinfo.subslice_mask);
>
>+ /* Compute number of subslices/dualsubslices in first half of
>+ * the GPU.
>+ */
>+ half_max_subslices = topology->max_subslices / 2;
>+ half_subslices_mask = perf->devinfo.subslice_mask &
>+ ((1 << half_max_subslices) - 1);
>+ perf->devinfo.n_eu_sub_slices_half_slices = __builtin_popcount(half_subslices_mask);
>+
> /* Valid on most generations except Gen9LP. */
> perf->devinfo.eu_threads_count = 7;
>
>diff --git a/lib/i915/perf.h b/lib/i915/perf.h
>index 6803c149..1493da47 100644
>--- a/lib/i915/perf.h
>+++ b/lib/i915/perf.h
>@@ -32,7 +32,11 @@ extern "C" {
>
> #include "igt_list.h"
>
>-struct intel_device_info;
>+#define DIV_ROUND_UP(a, b) (((a) + (b) - 1) / (b))
>+
>+#define INTEL_DEVICE_MAX_SLICES (6) /* Maximum on gfx10 */
>+#define INTEL_DEVICE_MAX_SUBSLICES (8) /* Maximum on gfx11 */
>+#define INTEL_DEVICE_MAX_EUS_PER_SUBSLICE (16) /* Maximum on gfx12 */
>
> struct intel_perf_devinfo {
> char devname[20];
>@@ -77,12 +81,66 @@ struct intel_perf_devinfo {
> uint64_t n_eu_slices;
> /* Total number of subslices/dualsubslices */
> uint64_t n_eu_sub_slices;
>+ /* Number of subslices/dualsubslices in the first half of the
>+ * slices.
>+ */
>+ uint64_t n_eu_sub_slices_half_slices;
> /* Mask of available subslices/dualsubslices */
> uint64_t subslice_mask;
> /* Mask of available slices */
> uint64_t slice_mask;
> /* Number of threads in one EU */
> uint64_t eu_threads_count;
>+
>+ /**
>+ * Maximu number of slices present on this device (can be more than
>+ * num_slices if some slices are fused).
>+ */
>+ uint16_t max_slices;
>+
>+ /**
>+ * Maximu number of subslices per slice present on this device (can be more
>+ * than the maximum value in the num_subslices[] array if some subslices are
>+ * fused).
>+ */
>+ uint16_t max_subslices_per_slice;
>+
>+ /**
>+ * Stride to access subslice_masks[].
>+ */
>+ uint16_t subslice_slice_stride;
>+
>+ /**
>+ * Maximum number of EUs per subslice (can be more than
>+ * num_eu_per_subslice if some EUs are fused off).
>+ */
>+ uint16_t max_eu_per_subslice;
>+
>+ /**
>+ * Strides to access eu_masks[].
>+ */
>+ uint16_t eu_slice_stride;
>+ uint16_t eu_subslice_stride;
>+
>+ /**
>+ * A bit mask of the slices available.
>+ */
>+ uint8_t slice_masks[DIV_ROUND_UP(INTEL_DEVICE_MAX_SLICES, 8)];
>+
>+ /**
>+ * An array of bit mask of the subslices available, use subslice_slice_stride
>+ * to access this array.
>+ */
>+ uint8_t subslice_masks[INTEL_DEVICE_MAX_SLICES *
>+ DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)];
>+
>+ /**
>+ * An array of bit mask of EUs available, use eu_slice_stride &
>+ * eu_subslice_stride to access this array.
>+ */
>+ uint8_t eu_masks[INTEL_DEVICE_MAX_SLICES *
>+ INTEL_DEVICE_MAX_SUBSLICES *
>+ DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)];
> };
>
> typedef enum {
>@@ -232,6 +290,31 @@ struct intel_perf {
> struct drm_i915_perf_record_header;
> struct drm_i915_query_topology_info;
>
>+static inline bool
>+intel_perf_devinfo_slice_available(const struct intel_perf_devinfo *devinfo,
>+ int slice)
>+{
>+ return (devinfo->slice_masks[slice / 8] & (1U << (slice % 8))) != 0;
>+}
>+
>+static inline bool
>+intel_perf_devinfo_subslice_available(const struct intel_perf_devinfo *devinfo,
>+ int slice, int subslice)
>+{
>+ return (devinfo->subslice_masks[slice * devinfo->subslice_slice_stride +
>+ subslice / 8] & (1U << (subslice % 8))) != 0;
>+}
>+
>+static inline bool
>+intel_perf_devinfo_eu_available(const struct intel_perf_devinfo *devinfo,
>+ int slice, int subslice, int eu)
>+{
>+ unsigned subslice_offset = slice * devinfo->eu_slice_stride +
>+ subslice * devinfo->eu_subslice_stride;
>+
>+ return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0;
>+}
>+
> struct intel_perf *intel_perf_for_fd(int drm_fd);
> struct intel_perf *intel_perf_for_devinfo(uint32_t device_id,
> uint32_t revision,
>diff --git a/lib/meson.build b/lib/meson.build
>index b51cf23c..b319a3c8 100644
>--- a/lib/meson.build
>+++ b/lib/meson.build
>@@ -308,7 +308,7 @@ lib_igt_i915_perf_build = shared_library(
> dependencies: lib_igt_chipset,
> include_directories : inc,
> install: true,
>- soversion: '1.4')
>+ soversion: '1.5')
>
> lib_igt_i915_perf = declare_dependency(
> link_with : lib_igt_i915_perf_build,
>@@ -329,7 +329,7 @@ pkgconf.set('prefix', get_option('prefix'))
> pkgconf.set('exec_prefix', '${prefix}')
> pkgconf.set('libdir', '${prefix}/@0@'.format(get_option('libdir')))
> pkgconf.set('includedir', '${prefix}/@0@'.format(get_option('includedir')))
>-pkgconf.set('i915_perf_version', '1.4.0')
>+pkgconf.set('i915_perf_version', '1.5.0')
>
> configure_file(
> input : 'i915-perf.pc.in',
>--
>2.25.1
>
next prev parent reply other threads:[~2022-10-10 22:59 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-10 21:41 [igt-dev] [PATCH i-g-t v6 00/36] Add DG2 OA test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 01/36] i915/perf: Check regularly if we are done reading reports Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 02/36] i915/perf: Fix OA short_reads test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 03/36] i915/perf: Check return value from getparam Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 04/36] i915/perf: Limit sseu-config tests for gen11 Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 05/36] i915/perf: Account for OA sampling interval in polling test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 06/36] i915/perf: Define OA report types and fix oa-formats test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 07/36] i915/perf: Use ARRAY_SIZE consistently for num_properties Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 08/36] i915/perf: Use gt in perf tests and lib Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 09/36] i915/perf: Explicitly state rendercopy needs for a test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 10/36] i915/perf: Skip tests that use rendercopy Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 11/36] i915/perf: Add OA formats for DG2 Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 12/36] i915/perf: Fix CS timestamp vs OA timstamp mismatch Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 13/36] i915/perf: Wait longer for rc6 residency in DG2 Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 14/36] lib/i915/perf: implement report accumulation for new format Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 15/36] lib/i915/perf: fixup conversion script for XEHPSDV Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 16/36] lib/i915/perf: make warning message more helpful Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 17/36] lib/i915/perf: expose new operators for codegen Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 18/36] tools/i915-perf-recorder: add ability to select device Umesh Nerlige Ramappa
2022-10-10 22:06 ` Umesh Nerlige Ramappa
2022-10-11 6:22 ` Petri Latvala
2022-10-18 22:52 ` Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 19/36] lib/i915/perf: fixup report validity Umesh Nerlige Ramappa
2022-10-10 22:08 ` Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 20/36] lib/i915/perf: add a helper to read timestamps Umesh Nerlige Ramappa
2022-10-10 22:22 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 21/36] lib/i915/perf: store bit shifting required for OA timestamps Umesh Nerlige Ramappa
2022-10-10 22:52 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 22/36] lib/i915/perf: indentation fix Umesh Nerlige Ramappa
2022-10-10 22:52 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 23/36] tools/i915-perf-recorder: capture OA & CS frequencies Umesh Nerlige Ramappa
2022-10-10 22:54 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 24/36] tools/i915-perf: make timestamp range easier to compare Umesh Nerlige Ramappa
2022-10-10 22:54 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 25/36] tools/i915-perf: printout CPU clock used Umesh Nerlige Ramappa
2022-10-10 22:55 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 26/36] tools/i915-perf: record remaining perf data on exit Umesh Nerlige Ramappa
2022-10-10 22:55 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 27/36] lib/i915/perf: add support for new EuDualSubslicesTotalCount var Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 28/36] lib/i915/perf-config: extend the device info Umesh Nerlige Ramappa
2022-10-10 22:57 ` Umesh Nerlige Ramappa [this message]
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 29/36] i915/perf: update import script Umesh Nerlige Ramappa
2022-10-10 23:00 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 30/36] lib/i915/perf: add a raw timestamp utility Umesh Nerlige Ramappa
2022-10-10 23:00 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 31/36] lib/i915/perf: add helper function to get report reason Umesh Nerlige Ramappa
2022-10-10 23:02 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 32/36] tools/i915-perf: add option to printout reports data Umesh Nerlige Ramappa
2022-10-10 23:03 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 33/36] lib/i915: prepare codegen for new ACM/DG2 variables Umesh Nerlige Ramappa
2022-10-10 23:04 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 34/36] lib/i915/perf: Add ACM GT1 metrics Umesh Nerlige Ramappa
2022-10-18 22:49 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 35/36] lib/i915/perf: Add ACM GT2 metrics Umesh Nerlige Ramappa
2022-10-18 22:49 ` Umesh Nerlige Ramappa
2022-10-10 22:01 ` [igt-dev] [PATCH i-g-t v6 00/36] Add DG2 OA test Umesh Nerlige Ramappa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Y0Sjd9GjZJVZIML9@unerlige-ril \
--to=umesh.nerlige.ramappa@intel.com \
--cc=ashutosh.dixit@intel.com \
--cc=igt-dev@lists.freedesktop.org \
--cc=lionel.g.landwerlin@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.