All of lore.kernel.org
 help / color / mirror / Atom feed
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
To: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: igt-dev@lists.freedesktop.org
Subject: Re: [igt-dev] [PATCH i-g-t 16/23] i915/perf: Treat ticks as 64 bit
Date: Tue, 6 Sep 2022 11:56:12 -0700	[thread overview]
Message-ID: <YxeXzC2JjhqApkPY@unerlige-ril> (raw)
In-Reply-To: <db5a5463-18c7-3a2a-9ecb-b07279b408c7@intel.com>

On Tue, Sep 06, 2022 at 05:08:50PM +0300, Lionel Landwerlin wrote:
>On 23/08/2022 21:30, Umesh Nerlige Ramappa wrote:
>>DG2 A0 has a buggy header in the 64 bit OAG format. Except the ticks,
>>all other fields are 32 bit. To resolve this, we will store ticks in 64
>>bit type.
>>
>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>---
>>  tests/i915/perf.c | 82 ++++++++++++++++++++++++++++++++---------------
>>  1 file changed, 56 insertions(+), 26 deletions(-)
>>
>>diff --git a/tests/i915/perf.c b/tests/i915/perf.c
>>index 3eb31f3b..8836da66 100644
>>--- a/tests/i915/perf.c
>>+++ b/tests/i915/perf.c
>>@@ -117,6 +117,7 @@ struct oa_format {
>>  	int c_off;
>>  	int n_c;
>>  	int oa_type;
>>+	bool report_hdr_64bit;
>>  };
>>  static struct oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
>>@@ -203,7 +204,8 @@ static struct oa_format dg2_oa_formats[I915_OA_FORMAT_MAX] = {
>>  		"A38u64_R2u64_B8_C8", .size = 448,
>>  		.a64_off = 32, .n_a64 = 38,
>>  		.b_off = 352, .n_b = 8,
>>-		.c_off = 384, .n_c = 8, .oa_type = OAG, },
>>+		.c_off = 384, .n_c = 8, .oa_type = OAG,
>>+		.report_hdr_64bit = true, },
>>  	[I915_OAR_FORMAT_A36u64_B8_C8] = {
>>  		"A36u64_B8_C8", .size = 384,
>>  		.a64_off = 32, .n_a64 = 36,
>>@@ -245,7 +247,7 @@ static bool *undefined_a_counters;
>>  static uint64_t oa_exp_1_millisec;
>>  static igt_render_copyfunc_t render_copy = NULL;
>>-static uint32_t (*read_report_ticks)(const uint32_t *report,
>>+static uint64_t (*read_report_ticks)(const uint32_t *report,
>>  				     enum drm_i915_oa_format format);
>>  static void (*sanity_check_reports)(const uint32_t *oa_report0,
>>  				    const uint32_t *oa_report1,
>>@@ -395,7 +397,7 @@ sysfs_read(enum i915_attr_id id)
>>   * C2 corresponds to a clock counter for the Haswell render basic metric set
>>   * but it's not included in all of the formats.
>>   */
>>-static uint32_t
>>+static uint64_t
>>  hsw_read_report_ticks(const uint32_t *report, enum drm_i915_oa_format format)
>>  {
>>  	uint32_t *c = (uint32_t *)(((uint8_t *)report) + get_oa_format(format).c_off);
>>@@ -405,10 +407,41 @@ hsw_read_report_ticks(const uint32_t *report, enum drm_i915_oa_format format)
>>  	return c[2];
>>  }
>>-static uint32_t
>>+static uint64_t
>>  gen8_read_report_ticks(const uint32_t *report, enum drm_i915_oa_format format)
>>  {
>>-	return report[3];
>>+
>>+	struct oa_format fmt = get_oa_format(format);
>>+
>>+	return fmt.report_hdr_64bit ? *(uint64_t *)&report[6] : report[3];
>>+}
>>+
>>+/*
>>+ * t0 is a value sampled before t1. width is number of bits used to represent
>>+ * t0/t1. Normally t1 is greater than t0. In cases where t1 < t0 use this
>>+ * helper. Since the size of t1/t0 is already 64 bits, no special handling is
>>+ * needed for width = 64.
>>+ */
>>+static uint64_t
>>+__elapsed_delta(uint64_t t1, uint64_t t0, uint32_t width)
>>+{
>>+	uint32_t max_bits = sizeof(t1) * 8;
>>+
>>+	igt_assert(width <= max_bits);
>>+
>>+	if (t1 < t0 && width != max_bits)
>>+		return ((1ULL << width) - t0) + t1;
>
>
>Should this be : ((1ULL << width) + t1) - t0 ?
>

Both ways should yield the same result.

>
>Since we know the t1 wrapped around the counter's max bit size, the 
>actual difference is the max value + whatever is in the counter minus 
>t0.
>
>
>>+
>>+	return t1 - t0;
>>+}
>>+
>>+static uint64_t
>>+oa_tick_delta(const uint32_t *report1,
>>+	      const uint32_t *report0,
>>+	      enum drm_i915_oa_format format)
>>+{
>>+	return __elapsed_delta(read_report_ticks(report1, format),
>>+			       read_report_ticks(report0, format), 32);
>>  }
>>  static void
>>@@ -638,8 +671,8 @@ hsw_sanity_check_render_basic_reports(const uint32_t *oa_report0,
>>  				      enum drm_i915_oa_format fmt)
>>  {
>>  	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
>>-	uint32_t clock_delta;
>>-	uint32_t max_delta;
>>+	uint64_t clock_delta;
>>+	uint64_t max_delta;
>>  	struct oa_format format = get_oa_format(fmt);
>>  	igt_assert_neq(time_delta, 0);
>>@@ -654,21 +687,19 @@ hsw_sanity_check_render_basic_reports(const uint32_t *oa_report0,
>>  		clock_delta = (gt_max_freq_mhz *
>>  			       (uint64_t)time_delta) / 1000;
>>  	} else {
>>-		uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
>>-		uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
>>  		uint64_t freq;
>>-		clock_delta = ticks1 - ticks0;
>>+		clock_delta = oa_tick_delta(oa_report1, oa_report0, fmt);
>>-		igt_assert_neq(clock_delta, 0);
>>+		igt_assert_neq_u64(clock_delta, 0);
>>-		freq = ((uint64_t)clock_delta * 1000) / time_delta;
>>+		freq = (clock_delta * 1000) / time_delta;
>>  		igt_debug("freq = %"PRIu64"\n", freq);
>>  		igt_assert(freq <= gt_max_freq_mhz);
>>  	}
>>-	igt_debug("clock delta = %"PRIu32"\n", clock_delta);
>>+	igt_debug("clock delta = %"PRIu64"\n", clock_delta);
>>  	/* The maximum rate for any HSW counter =
>>  	 *   clock_delta * N EUs
>>@@ -801,7 +832,8 @@ accumulate_reports(struct accumulator *accumulator,
>>  		accumulate_uint32(4, start, end, deltas + idx++);
>>  		/* clock cycles */
>>-		accumulate_uint32(12, start, end, deltas + idx++);
>>+		deltas[idx] += oa_tick_delta(end, start, accumulator->format);
>>+		idx++;
>>  	} else {
>>  		/* timestamp */
>>  		accumulate_uint32(4, start, end, deltas + idx++);
>>@@ -874,10 +906,8 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
>>  {
>>  	struct oa_format format = get_oa_format(fmt);
>>  	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
>>-	uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
>>-	uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
>>-	uint32_t clock_delta = ticks1 - ticks0;
>>-	uint32_t max_delta;
>>+	uint64_t clock_delta = oa_tick_delta(oa_report1, oa_report0, fmt);
>>+	uint64_t max_delta;
>>  	uint64_t freq;
>>  	uint32_t *rpt0_b = (uint32_t *)(((uint8_t *)oa_report0) +
>>  					format.b_off);
>>@@ -890,10 +920,10 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
>>  		  gen8_read_report_reason(oa_report0),
>>  		  gen8_read_report_reason(oa_report1));
>>-	freq = time_delta ? ((uint64_t)clock_delta * 1000) / time_delta : 0;
>>+	freq = time_delta ? (clock_delta * 1000) / time_delta : 0;
>>  	igt_debug("freq = %"PRIu64"\n", freq);
>>-	igt_debug("clock delta = %"PRIu32"\n", clock_delta);
>>+	igt_debug("clock delta = %"PRIu64"\n", clock_delta);
>>  	max_delta = clock_delta * intel_perf->devinfo.n_eus;
>>@@ -994,7 +1024,7 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
>>  					    format.c_off);
>>  		uint32_t delta = c1[j] - c0[j];
>>-		igt_debug("C%d: delta = %"PRIu32", max_delta=%"PRIu32"\n",
>>+		igt_debug("C%d: delta = %"PRIu32", max_delta=%"PRIu64"\n",
>>  			  j, delta, max_delta);
>>  		igt_assert(delta <= max_delta);
>>  	}
>>@@ -1440,10 +1470,10 @@ print_reports(uint32_t *oa_report0, uint32_t *oa_report1, int fmt)
>>  	if (IS_HASWELL(devid) && format.n_c == 0) {
>>  		igt_debug("CLOCK = N/A\n");
>>  	} else {
>>-		uint32_t clock0 = read_report_ticks(oa_report0, fmt);
>>-		uint32_t clock1 = read_report_ticks(oa_report1, fmt);
>>+		uint64_t clock0 = read_report_ticks(oa_report0, fmt);
>>+		uint64_t clock1 = read_report_ticks(oa_report1, fmt);
>>-		igt_debug("CLOCK: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
>>+		igt_debug("CLOCK: 1st = %"PRIu64", 2nd = %"PRIu64", delta = %"PRIu64"\n",
>>  			  clock0, clock1, clock1 - clock0);
>>  	}
>>@@ -1545,9 +1575,9 @@ print_report(uint32_t *report, int fmt)
>>  	if (IS_HASWELL(devid) && format.n_c == 0) {
>>  		igt_debug("CLOCK = N/A\n");
>>  	} else {
>>-		uint32_t clock = read_report_ticks(report, fmt);
>>+		uint64_t clock = read_report_ticks(report, fmt);
>>-		igt_debug("CLOCK: %"PRIu32"\n", clock);
>>+		igt_debug("CLOCK: %"PRIu64"\n", clock);
>>  	}
>>  	if (intel_gen(devid) >= 8) {
>
>

  reply	other threads:[~2022-09-06 18:56 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-23 18:30 [igt-dev] [PATCH i-g-t 00/23] Add DG2 OA test Umesh Nerlige Ramappa
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 01/23] i915/perf: Check regularly if we are done reading reports Umesh Nerlige Ramappa
2022-09-06 12:49   ` Lionel Landwerlin
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 02/23] i915/perf: Fix OA short_reads test Umesh Nerlige Ramappa
2022-09-06 12:50   ` Lionel Landwerlin
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 03/23] i915/perf: Check return value from getparam Umesh Nerlige Ramappa
2022-09-06 12:52   ` Lionel Landwerlin
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 04/23] i915/perf: Limit sseu-config tests for gen11 Umesh Nerlige Ramappa
2022-09-06 12:53   ` Lionel Landwerlin
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 05/23] i915/perf: Bump timestamp tolerance for DG1 Umesh Nerlige Ramappa
2022-09-06 13:06   ` Lionel Landwerlin
2022-09-06 19:30     ` Umesh Nerlige Ramappa
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 06/23] i915/perf: Account for OA sampling interval in polling test Umesh Nerlige Ramappa
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 07/23] i915/perf: Add support for 64-bit counters Umesh Nerlige Ramappa
2022-09-06 13:05   ` Lionel Landwerlin
2022-09-06 19:37     ` Umesh Nerlige Ramappa
2022-09-06 20:02       ` Lionel Landwerlin
2022-09-06 20:58         ` Umesh Nerlige Ramappa
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 08/23] i915/perf: Define OA report types Umesh Nerlige Ramappa
2022-09-06 13:08   ` Lionel Landwerlin
2022-09-06 19:28     ` Umesh Nerlige Ramappa
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 09/23] i915/perf: Use ARRAY_SIZE consistently for num_properties Umesh Nerlige Ramappa
2022-09-06 13:10   ` Lionel Landwerlin
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 10/23] i915/perf: Use gt in perf tests and lib Umesh Nerlige Ramappa
2022-09-06 13:13   ` Lionel Landwerlin
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 11/23] i915/perf: Explicitly state rendercopy needs for a test Umesh Nerlige Ramappa
2022-09-06 13:26   ` Lionel Landwerlin
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 12/23] i915/perf: Skip tests that use rendercopy Umesh Nerlige Ramappa
2022-09-06 13:28   ` Lionel Landwerlin
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 13/23] i915/perf: Add OA formats for DG2 Umesh Nerlige Ramappa
2022-09-06 13:30   ` Lionel Landwerlin
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 14/23] i915/perf: Add a test for non-power-of-2 oa reports Umesh Nerlige Ramappa
2022-09-06 14:08   ` Lionel Landwerlin
2022-09-20 19:28     ` Umesh Nerlige Ramappa
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 15/23] i915/perf: Fix CS timestamp vs OA timstamp mismatch Umesh Nerlige Ramappa
2022-09-06 13:36   ` Lionel Landwerlin
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 16/23] i915/perf: Treat ticks as 64 bit Umesh Nerlige Ramappa
2022-09-06 14:08   ` Lionel Landwerlin
2022-09-06 18:56     ` Umesh Nerlige Ramappa [this message]
2022-09-06 19:06       ` Lionel Landwerlin
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 17/23] i915/perf: Treat timestamp as 64 bit value Umesh Nerlige Ramappa
2022-09-06 14:16   ` Lionel Landwerlin
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 18/23] i915/perf: Fix DG2 A0 report header Umesh Nerlige Ramappa
2022-09-06 14:17   ` Lionel Landwerlin
2022-09-06 18:37     ` Umesh Nerlige Ramappa
2022-09-06 18:54       ` Lionel Landwerlin
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 19/23] i915/perf: Wait longer for rc6 residency in DG2 Umesh Nerlige Ramappa
2022-09-06 14:18   ` Lionel Landwerlin
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 20/23] lib/i915/perf: implement report accumulation for new format Umesh Nerlige Ramappa
2022-09-06 14:21   ` Lionel Landwerlin
2022-09-20 20:02   ` Umesh Nerlige Ramappa
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 21/23] lib/i915/perf: fixup conversion script for XEHPSDV Umesh Nerlige Ramappa
2022-09-20 20:03   ` Umesh Nerlige Ramappa
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 22/23] lib/i915/perf: make warning message more helpful Umesh Nerlige Ramappa
2022-09-20 20:04   ` Umesh Nerlige Ramappa
2022-08-23 18:30 ` [igt-dev] [PATCH i-g-t 23/23] lib/i915/perf: Add DG2 metrics Umesh Nerlige Ramappa
2022-09-20 23:11   ` Umesh Nerlige Ramappa
2022-08-23 20:30 ` [igt-dev] [PATCH i-g-t 18/23] i915/perf: Fix DG2 A0 report header Umesh Nerlige Ramappa
2022-09-06 14:23   ` Lionel Landwerlin
2022-09-06 18:33     ` Umesh Nerlige Ramappa
2022-08-23 21:00 ` [igt-dev] ✗ Fi.CI.BUILD: failure for Add DG2 OA test Patchwork
2022-08-24  9:33 ` [igt-dev] ✗ Fi.CI.BUILD: failure for Add DG2 OA test (rev2) Patchwork
2022-08-24  9:58   ` Petri Latvala
2022-08-24 13:08 ` [igt-dev] ✓ Fi.CI.BAT: success for Add DG2 OA test (rev3) Patchwork
2022-08-25 20:33 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2022-08-22 23:56 [igt-dev] [PATCH i-g-t 00/23] Add DG2 OA test Umesh Nerlige Ramappa
2022-08-22 23:56 ` [igt-dev] [PATCH i-g-t 16/23] i915/perf: Treat ticks as 64 bit Umesh Nerlige Ramappa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YxeXzC2JjhqApkPY@unerlige-ril \
    --to=umesh.nerlige.ramappa@intel.com \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=lionel.g.landwerlin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.