[PATCH i-g-t 8/8] tests/intel/xe_drm_fdinfo: Stop asserting on usage percentage

Igt-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Lucas De Marchi <lucas.demarchi@intel.com>
To: igt-dev@lists.freedesktop.org
Cc: Lucas De Marchi <lucas.demarchi@intel.com>,
	Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Subject: [PATCH i-g-t 8/8] tests/intel/xe_drm_fdinfo: Stop asserting on usage percentage
Date: Fri,  3 Jan 2025 23:15:48 -0800	[thread overview]
Message-ID: <20250104071548.737612-8-lucas.demarchi@intel.com> (raw)
In-Reply-To: <20250104071548.737612-1-lucas.demarchi@intel.com>

It's unreliable to assert on the usage percentage considering 2 data
points as it still depends on the CPU scheduling not preempting tasks in
the wrong moment. On a normal use case of a top-like application, the
value not accounted for would simply show up in the next sample without
much issue. For a test assertion, it's better to check that the value
reported via fdinfo is reasonably close to the one saved by the GPU in
the spin. It's still allowed some error because there are a few GPU
ticks of difference due to the **GPU** scheduling the contexts.

Cc: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 tests/intel/xe_drm_fdinfo.c | 49 +++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/tests/intel/xe_drm_fdinfo.c b/tests/intel/xe_drm_fdinfo.c
index 1089e5119..120436fbe 100644
--- a/tests/intel/xe_drm_fdinfo.c
+++ b/tests/intel/xe_drm_fdinfo.c
@@ -3,6 +3,8 @@
  * Copyright © 2023 Intel Corporation
  */
 
+#include <math.h>
+
 #include "igt.h"
 #include "igt_core.h"
 #include "igt_device.h"
@@ -371,7 +373,8 @@ static void basic_engine_utilization(int xe)
 
 static void
 check_results(struct pceu_cycles *s1, struct pceu_cycles *s2,
-	      int class, int width, enum expected_load expected_load)
+	      int class, int width, uint32_t spin_stamp,
+	      enum expected_load expected_load)
 {
 	double percent;
 	u64 den, num;
@@ -383,12 +386,9 @@ check_results(struct pceu_cycles *s1, struct pceu_cycles *s2,
 
 	num = s2[class].cycles - s1[class].cycles;
 	den = s2[class].total_cycles - s1[class].total_cycles;
-	percent = (num * 100.0) / (den + 1);
-
-	/* for parallel submission scale the busyness with width */
-	percent /= width;
 
-	igt_debug("%s: percent: %f\n", engine_map[class], percent);
+	percent = (num * 100.0) / (den + 1) / width;
+	igt_debug("%s: percent: %.2f%%\n", engine_map[class], percent);
 
 	switch (expected_load) {
 	case EXPECTED_LOAD_IDLE:
@@ -396,11 +396,12 @@ check_results(struct pceu_cycles *s1, struct pceu_cycles *s2,
 		break;
 	case EXPECTED_LOAD_FULL:
 		/*
-		 * We are still relying on CPU sleep time and there could be
-		 * some imprecision when calculating the load. Use a 5% margin.
+		 * percentage error between value saved by gpu in xe_spin and what
+		 * is reported via fdinfo
 		 */
-		igt_assert_lt_double(95.0, percent);
-		igt_assert_lt_double(percent, 105.0);
+		percent = fabs((num - spin_stamp) * 100.0) / (spin_stamp + 1);
+		igt_debug("%s: error: %.2f%%\n", engine_map[class], percent);
+		igt_assert_lt_double(percent, 5.0);
 		break;
 	}
 }
@@ -438,14 +439,17 @@ utilization_single(int fd, struct drm_xe_engine_class_instance *hwe, unsigned in
 
 	expected_load = flags & TEST_BUSY ?
 	       EXPECTED_LOAD_FULL : EXPECTED_LOAD_IDLE;
-	check_results(pceu1[0], pceu2[0], hwe->engine_class, 1, expected_load);
+
+	check_results(pceu1[0], pceu2[0], hwe->engine_class, 1,
+		      cork ? cork->spin->timestamp : 0, expected_load);
 
 	if (flags & TEST_ISOLATION) {
 		/*
 		 * Load from one client shouldn't spill on another,
 		 * so check for idle
 		 */
-		check_results(pceu1[1], pceu2[1], hwe->engine_class, 1, EXPECTED_LOAD_IDLE);
+		check_results(pceu1[1], pceu2[1], hwe->engine_class, 1, 0,
+			      EXPECTED_LOAD_IDLE);
 		close(new_fd);
 	}
 
@@ -461,6 +465,7 @@ utilization_single_destroy_queue(int fd, struct drm_xe_engine_class_instance *hw
 	struct pceu_cycles pceu1[DRM_XE_ENGINE_CLASS_COMPUTE + 1];
 	struct pceu_cycles pceu2[DRM_XE_ENGINE_CLASS_COMPUTE + 1];
 	struct xe_cork *cork;
+	uint32_t timestamp;
 	uint32_t vm;
 
 	vm = xe_vm_create(fd, 0, 0);
@@ -472,13 +477,15 @@ utilization_single_destroy_queue(int fd, struct drm_xe_engine_class_instance *hw
 
 	/* destroy queue before sampling again */
 	xe_cork_sync_end(fd, cork);
+	timestamp = cork->spin->timestamp;
 	xe_cork_destroy(fd, cork);
 
 	read_engine_cycles(fd, pceu2);
 
 	xe_vm_destroy(fd, vm);
 
-	check_results(pceu1, pceu2, hwe->engine_class, 1, EXPECTED_LOAD_FULL);
+	check_results(pceu1, pceu2, hwe->engine_class, 1, timestamp,
+		      EXPECTED_LOAD_FULL);
 }
 
 static void
@@ -503,7 +510,8 @@ utilization_others_idle(int fd, struct drm_xe_engine_class_instance *hwe)
 		enum expected_load expected_load = hwe->engine_class != class ?
 			EXPECTED_LOAD_IDLE : EXPECTED_LOAD_FULL;
 
-		check_results(pceu1, pceu2, class, 1, expected_load);
+		check_results(pceu1, pceu2, class, 1, cork->spin->timestamp,
+			      expected_load);
 	}
 
 	xe_cork_destroy(fd, cork);
@@ -547,7 +555,8 @@ utilization_others_full_load(int fd, struct drm_xe_engine_class_instance *hwe)
 		if (!cork[class])
 			continue;
 
-		check_results(pceu1, pceu2, class, 1, expected_load);
+		check_results(pceu1, pceu2, class, 1, cork[class]->spin->timestamp,
+			      expected_load);
 		xe_cork_destroy(fd, cork[class]);
 	}
 
@@ -585,7 +594,9 @@ utilization_all_full_load(int fd)
 		if (!cork[class])
 			continue;
 
-		check_results(pceu1, pceu2, class, 1, EXPECTED_LOAD_FULL);
+		check_results(pceu1, pceu2, class, 1,
+			      cork[class]->spin->timestamp,
+			      EXPECTED_LOAD_FULL);
 		xe_cork_destroy(fd, cork[class]);
 	}
 
@@ -657,14 +668,16 @@ utilization_multi(int fd, int gt, int class, unsigned int flags)
 
 	expected_load = flags & TEST_BUSY ?
 	       EXPECTED_LOAD_FULL : EXPECTED_LOAD_IDLE;
-	check_results(pceu[0], pceu[1], class, width, expected_load);
+
+	check_results(pceu[0], pceu[1], class, width,
+		      cork ? cork->spin->timestamp : 0, expected_load);
 
 	if (flags & TEST_ISOLATION) {
 		/*
 		 * Load from one client shouldn't spill on another,
 		 * so check for idle
 		 */
-		check_results(pceu_spill[0], pceu_spill[1], class, width,
+		check_results(pceu_spill[0], pceu_spill[1], class, width, 0,
 			      EXPECTED_LOAD_IDLE);
 		close(fd_spill);
 	}
-- 
2.47.0

next prev parent reply	other threads:[~2025-01-04  7:16 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-04  7:15 [PATCH i-g-t 1/8] tests/intel/xe_drm_fdinfo: Do not end cork not started Lucas De Marchi
2025-01-04  7:15 ` [PATCH i-g-t 2/8] lib/xe/xe_gt: Fix header guards and boilerplate Lucas De Marchi
2025-01-06 22:58   ` Cavitt, Jonathan
2025-01-04  7:15 ` [PATCH i-g-t 3/8] lib/xe: Move functions from xe_util to xe_gt Lucas De Marchi
2025-01-06 22:58   ` Cavitt, Jonathan
2025-01-07 17:57     ` Lucas De Marchi
2025-01-04  7:15 ` [PATCH i-g-t 4/8] lib/xe: Rename xe_is_gt_in_c6() Lucas De Marchi
2025-01-06 22:58   ` Cavitt, Jonathan
2025-01-04  7:15 ` [PATCH i-g-t 5/8] lib/xe: Split nsec to ticks abstraction Lucas De Marchi
2025-01-06 22:58   ` Cavitt, Jonathan
2025-01-04  7:15 ` [PATCH i-g-t 6/8] lib/xe/xe_spin: Move declarations around Lucas De Marchi
2025-01-06 22:58   ` Cavitt, Jonathan
2025-01-07 18:05     ` Lucas De Marchi
2025-01-04  7:15 ` [PATCH i-g-t 7/8] treewide: s/ctx/cork/ when referring to xe_cork Lucas De Marchi
2025-01-06 22:58   ` Cavitt, Jonathan
2025-01-04  7:15 ` Lucas De Marchi [this message]
2025-01-06 22:58   ` [PATCH i-g-t 8/8] tests/intel/xe_drm_fdinfo: Stop asserting on usage percentage Cavitt, Jonathan
2025-01-07 19:06     ` Lucas De Marchi
2025-01-06 14:58 ` ✓ i915.CI.BAT: success for series starting with [i-g-t,1/8] tests/intel/xe_drm_fdinfo: Do not end cork not started (rev2) Patchwork
2025-01-06 15:05 ` ✓ Xe.CI.BAT: " Patchwork
2025-01-06 15:08 ` [PATCH i-g-t 1/8] tests/intel/xe_drm_fdinfo: Do not end cork not started Cavitt, Jonathan
2025-01-06 17:22   ` Lucas De Marchi
2025-01-06 17:03 ` ✓ i915.CI.Full: success for series starting with [i-g-t,1/8] tests/intel/xe_drm_fdinfo: Do not end cork not started (rev2) Patchwork
2025-01-07  6:07 ` ✗ Xe.CI.Full: failure " Patchwork

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:1089e511 dfblob:120436fb )
 OR (
bs:"[PATCH i-g-t 8/8] tests/intel/xe_drm_fdinfo: Stop asserting on usage percentage" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250104071548.737612-8-lucas.demarchi@intel.com \
    --to=lucas.demarchi@intel.com \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=umesh.nerlige.ramappa@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox