From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
To: <igt-dev@lists.freedesktop.org>,
Lionel G Landwerlin <lionel.g.landwerlin@intel.com>,
Ashutosh Dixit <ashutosh.dixit@intel.com>
Subject: Re: [igt-dev] [PATCH i-g-t v6 21/36] lib/i915/perf: store bit shifting required for OA timestamps
Date: Mon, 10 Oct 2022 15:52:00 -0700 [thread overview]
Message-ID: <Y0SiECRbV6qDIeOH@unerlige-ril> (raw)
In-Reply-To: <20221010214215.5378-22-umesh.nerlige.ramappa@intel.com>
some nits, but lgtm,
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Umesh
On Mon, Oct 10, 2022 at 09:42:00PM +0000, Umesh Nerlige Ramappa wrote:
>From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>
>Some new platforms have the OA timestamps shifted to the right by a
>number of bits. This is configurable but since i915 appears to leave
>the default values, let's just hardcode this in the device info.
>
>Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>---
> lib/i915/perf.c | 31 +++++++++++++++++++++++++-----
> lib/i915/perf.h | 18 ++++++++++++++++-
> lib/i915/perf_data_reader.c | 2 +-
> lib/meson.build | 4 ++--
> tools/i915-perf/i915_perf_reader.c | 3 ++-
> 5 files changed, 48 insertions(+), 10 deletions(-)
>
>diff --git a/lib/i915/perf.c b/lib/i915/perf.c
>index 82f08b36..05730d64 100644
>--- a/lib/i915/perf.c
>+++ b/lib/i915/perf.c
>@@ -208,6 +208,10 @@ intel_perf_for_devinfo(uint32_t device_id,
> /* Valid on most generations except Gen9LP. */
> perf->devinfo.eu_threads_count = 7;
>
>+ /* Most platforms have full 32bit timestamps. */
>+ perf->devinfo.oa_timestamp_mask = 0xffffffff;
>+ perf->devinfo.oa_timestamp_shift = 0;
>+
> if (devinfo->is_haswell) {
> intel_perf_load_metrics_hsw(perf);
> } else if (devinfo->is_broadwell) {
>@@ -663,7 +667,8 @@ accumulate_uint40(int a_index,
> }
>
> void intel_perf_accumulate_reports(struct intel_perf_accumulator *acc,
>- int oa_format,
>+ const struct intel_perf *perf,
>+ const struct intel_perf_metric_set *metric_set,
> const struct drm_i915_perf_record_header *record0,
> const struct drm_i915_perf_record_header *record1)
> {
>@@ -675,9 +680,13 @@ void intel_perf_accumulate_reports(struct intel_perf_accumulator *acc,
>
> memset(acc, 0, sizeof(*acc));
>
>- switch (oa_format) {
>+ switch (metric_set->perf_oa_format) {
> case I915_OA_FORMAT_A24u40_A14u32_B8_C8:
>- accumulate_uint32(start + 1, end + 1, deltas + idx++); /* timestamp */
>+ /* timestamp */
>+ if (perf->devinfo.oa_timestamp_shift >= 0)
>+ deltas[idx++] += (end[1] - start[1]) << perf->devinfo.oa_timestamp_shift;
>+ else
>+ deltas[idx++] += (end[1] - start[1]) >> (-perf->devinfo.oa_timestamp_shift);
I am guessing the negative numbers are used to shift right and positive
for shift left.
> accumulate_uint32(start + 3, end + 3, deltas + idx++); /* clock */
>
> /* 4x 32bit A0-3 counters... */
>@@ -710,7 +719,10 @@ void intel_perf_accumulate_reports(struct intel_perf_accumulator *acc,
>
> case I915_OAR_FORMAT_A32u40_A4u32_B8_C8:
> case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
>- accumulate_uint32(start + 1, end + 1, deltas + idx++); /* timestamp */
>+ if (perf->devinfo.oa_timestamp_shift >= 0)
>+ deltas[idx++] += (end[1] - start[1]) << perf->devinfo.oa_timestamp_shift;
>+ else
>+ deltas[idx++] += (end[1] - start[1]) >> (-perf->devinfo.oa_timestamp_shift);
These could use a helper ^.
> accumulate_uint32(start + 3, end + 3, deltas + idx++); /* clock */
>
> /* 32x 40bit A counters... */
>@@ -727,7 +739,11 @@ void intel_perf_accumulate_reports(struct intel_perf_accumulator *acc,
> break;
>
> case I915_OA_FORMAT_A45_B8_C8:
>- accumulate_uint32(start + 1, end + 1, deltas); /* timestamp */
>+ /* timestamp */
>+ if (perf->devinfo.oa_timestamp_shift >= 0)
>+ deltas[0] += (end[1] - start[1]) << perf->devinfo.oa_timestamp_shift;
>+ else
>+ deltas[0] += (end[1] - start[1]) >> (-perf->devinfo.oa_timestamp_shift);
>
> for (i = 0; i < 61; i++)
> accumulate_uint32(start + 3 + i, end + 3 + i, deltas + 1 + i);
>@@ -756,5 +772,10 @@ uint64_t intel_perf_read_record_timestamp(const struct intel_perf *perf,
> assert(0);
> }
>
>+ if (perf->devinfo.oa_timestamp_shift >= 0)
>+ ts <<= perf->devinfo.oa_timestamp_shift;
>+ else
>+ ts >>= -perf->devinfo.oa_timestamp_shift;
>+
> return ts;
> }
>diff --git a/lib/i915/perf.h b/lib/i915/perf.h
>index e3848b7e..6803c149 100644
>--- a/lib/i915/perf.h
>+++ b/lib/i915/perf.h
>@@ -52,6 +52,21 @@ struct intel_perf_devinfo {
> uint32_t devid;
> uint32_t graphics_ver;
> uint32_t revision;
>+ /**
>+ * Bit shifting required to put OA report timestamps into
>+ * timestamp_frequency (some HW generations can shift
>+ * timestamp values to the right by a number of bits).
>+ */
>+ int32_t oa_timestamp_shift;
>+ /**
>+ * On some platforms only part of the timestamp bits are valid
>+ * (on previous platforms we would get full 32bits, newer
>+ * platforms can have fewer). It's important to know when
>+ * correlating the full 36bits timestamps to the OA report
>+ * timestamps.
>+ */
>+ uint64_t oa_timestamp_mask;
>+ /* Frequency of the timestamps in Hz */
> uint64_t timestamp_frequency;
> uint64_t gt_min_freq;
> uint64_t gt_max_freq;
>@@ -236,7 +251,8 @@ void intel_perf_add_metric_set(struct intel_perf *perf,
> void intel_perf_load_perf_configs(struct intel_perf *perf, int drm_fd);
>
> void intel_perf_accumulate_reports(struct intel_perf_accumulator *acc,
>- int oa_format,
>+ const struct intel_perf *perf,
>+ const struct intel_perf_metric_set *metric_set,
> const struct drm_i915_perf_record_header *record0,
> const struct drm_i915_perf_record_header *record1);
>
>diff --git a/lib/i915/perf_data_reader.c b/lib/i915/perf_data_reader.c
>index 4d1efe92..fcdd5ed1 100644
>--- a/lib/i915/perf_data_reader.c
>+++ b/lib/i915/perf_data_reader.c
>@@ -209,7 +209,7 @@ correlate_gpu_timestamp(struct intel_perf_data_reader *reader,
> * Try to figure what portion of the correlation data the
> * 32bit timestamp belongs to.
> */
>- uint64_t mask = 0xffffffff;
>+ uint64_t mask = reader->perf->devinfo.oa_timestamp_mask;
> int corr_idx = -1;
>
> for (uint32_t i = 0; i < reader->n_correlation_chunks; i++) {
>diff --git a/lib/meson.build b/lib/meson.build
>index 2e5adbae..b51cf23c 100644
>--- a/lib/meson.build
>+++ b/lib/meson.build
>@@ -308,7 +308,7 @@ lib_igt_i915_perf_build = shared_library(
> dependencies: lib_igt_chipset,
> include_directories : inc,
> install: true,
>- soversion: '1')
>+ soversion: '1.4')
>
> lib_igt_i915_perf = declare_dependency(
> link_with : lib_igt_i915_perf_build,
>@@ -329,7 +329,7 @@ pkgconf.set('prefix', get_option('prefix'))
> pkgconf.set('exec_prefix', '${prefix}')
> pkgconf.set('libdir', '${prefix}/@0@'.format(get_option('libdir')))
> pkgconf.set('includedir', '${prefix}/@0@'.format(get_option('includedir')))
>-pkgconf.set('i915_perf_version', '1.3.0')
>+pkgconf.set('i915_perf_version', '1.4.0')
>
> configure_file(
> input : 'i915-perf.pc.in',
>diff --git a/tools/i915-perf/i915_perf_reader.c b/tools/i915-perf/i915_perf_reader.c
>index afd8c39c..4a2e3df4 100644
>--- a/tools/i915-perf/i915_perf_reader.c
>+++ b/tools/i915-perf/i915_perf_reader.c
>@@ -271,7 +271,8 @@ main(int argc, char *argv[])
> fprintf(stdout, "hw_id=0x%x %s\n",
> item->hw_id, item->hw_id == 0xffffffff ? "(idle)" : "");
>
>- intel_perf_accumulate_reports(&accu, reader.metric_set->perf_oa_format,
>+ intel_perf_accumulate_reports(&accu,
>+ reader.perf, reader.metric_set,
> i915_report0, i915_report1);
>
> for (uint32_t c = 0; c < n_counters; c++) {
>--
>2.25.1
>
next prev parent reply other threads:[~2022-10-10 22:52 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-10 21:41 [igt-dev] [PATCH i-g-t v6 00/36] Add DG2 OA test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 01/36] i915/perf: Check regularly if we are done reading reports Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 02/36] i915/perf: Fix OA short_reads test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 03/36] i915/perf: Check return value from getparam Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 04/36] i915/perf: Limit sseu-config tests for gen11 Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 05/36] i915/perf: Account for OA sampling interval in polling test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 06/36] i915/perf: Define OA report types and fix oa-formats test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 07/36] i915/perf: Use ARRAY_SIZE consistently for num_properties Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 08/36] i915/perf: Use gt in perf tests and lib Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 09/36] i915/perf: Explicitly state rendercopy needs for a test Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 10/36] i915/perf: Skip tests that use rendercopy Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 11/36] i915/perf: Add OA formats for DG2 Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 12/36] i915/perf: Fix CS timestamp vs OA timstamp mismatch Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 13/36] i915/perf: Wait longer for rc6 residency in DG2 Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 14/36] lib/i915/perf: implement report accumulation for new format Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 15/36] lib/i915/perf: fixup conversion script for XEHPSDV Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 16/36] lib/i915/perf: make warning message more helpful Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 17/36] lib/i915/perf: expose new operators for codegen Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 18/36] tools/i915-perf-recorder: add ability to select device Umesh Nerlige Ramappa
2022-10-10 22:06 ` Umesh Nerlige Ramappa
2022-10-11 6:22 ` Petri Latvala
2022-10-18 22:52 ` Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 19/36] lib/i915/perf: fixup report validity Umesh Nerlige Ramappa
2022-10-10 22:08 ` Umesh Nerlige Ramappa
2022-10-10 21:41 ` [igt-dev] [PATCH i-g-t v6 20/36] lib/i915/perf: add a helper to read timestamps Umesh Nerlige Ramappa
2022-10-10 22:22 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 21/36] lib/i915/perf: store bit shifting required for OA timestamps Umesh Nerlige Ramappa
2022-10-10 22:52 ` Umesh Nerlige Ramappa [this message]
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 22/36] lib/i915/perf: indentation fix Umesh Nerlige Ramappa
2022-10-10 22:52 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 23/36] tools/i915-perf-recorder: capture OA & CS frequencies Umesh Nerlige Ramappa
2022-10-10 22:54 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 24/36] tools/i915-perf: make timestamp range easier to compare Umesh Nerlige Ramappa
2022-10-10 22:54 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 25/36] tools/i915-perf: printout CPU clock used Umesh Nerlige Ramappa
2022-10-10 22:55 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 26/36] tools/i915-perf: record remaining perf data on exit Umesh Nerlige Ramappa
2022-10-10 22:55 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 27/36] lib/i915/perf: add support for new EuDualSubslicesTotalCount var Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 28/36] lib/i915/perf-config: extend the device info Umesh Nerlige Ramappa
2022-10-10 22:57 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 29/36] i915/perf: update import script Umesh Nerlige Ramappa
2022-10-10 23:00 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 30/36] lib/i915/perf: add a raw timestamp utility Umesh Nerlige Ramappa
2022-10-10 23:00 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 31/36] lib/i915/perf: add helper function to get report reason Umesh Nerlige Ramappa
2022-10-10 23:02 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 32/36] tools/i915-perf: add option to printout reports data Umesh Nerlige Ramappa
2022-10-10 23:03 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 33/36] lib/i915: prepare codegen for new ACM/DG2 variables Umesh Nerlige Ramappa
2022-10-10 23:04 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 34/36] lib/i915/perf: Add ACM GT1 metrics Umesh Nerlige Ramappa
2022-10-18 22:49 ` Umesh Nerlige Ramappa
2022-10-10 21:42 ` [igt-dev] [PATCH i-g-t v6 35/36] lib/i915/perf: Add ACM GT2 metrics Umesh Nerlige Ramappa
2022-10-18 22:49 ` Umesh Nerlige Ramappa
2022-10-10 22:01 ` [igt-dev] [PATCH i-g-t v6 00/36] Add DG2 OA test Umesh Nerlige Ramappa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Y0SiECRbV6qDIeOH@unerlige-ril \
--to=umesh.nerlige.ramappa@intel.com \
--cc=ashutosh.dixit@intel.com \
--cc=igt-dev@lists.freedesktop.org \
--cc=lionel.g.landwerlin@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox