public inbox for igt-dev@lists.freedesktop.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH i-g-t 1/2] i915/gem_eio: Check average reset times
@ 2019-02-12 20:57 Chris Wilson
  2019-02-12 20:57 ` [igt-dev] [PATCH i-g-t 2/2] i915/gem_eio: Allow older platforms more leniency in reset latency Chris Wilson
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Chris Wilson @ 2019-02-12 20:57 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

As we have moved to rcu/srcu to serialise the resets, individual resets
are subject to small variations in system grace periods. Allow for this
by only expecting the median reset time to be within our target, thereby
excluding noisy outliers from perturbing our results (but keep the
maximum capped to prevent horrid failures!)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_eio.c | 53 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/tests/i915/gem_eio.c b/tests/i915/gem_eio.c
index 61054a07e..bd5266104 100644
--- a/tests/i915/gem_eio.c
+++ b/tests/i915/gem_eio.c
@@ -42,6 +42,7 @@
 
 #include "igt.h"
 #include "igt_device.h"
+#include "igt_stats.h"
 #include "igt_sysfs.h"
 #include "sw_sync.h"
 #include "i915/gem_ring.h"
@@ -239,10 +240,9 @@ static void hang_after(int fd, unsigned int us, struct timespec *ts)
 	igt_assert_eq(timer_settime(ctx->timer, 0, &its, NULL), 0);
 }
 
-static void check_wait(int fd, uint32_t bo, unsigned int wait)
+static void check_wait(int fd, uint32_t bo, unsigned int wait, igt_stats_t *st)
 {
 	struct timespec ts = {};
-	uint64_t elapsed;
 
 	if (wait) {
 		hang_after(fd, wait, &ts);
@@ -253,10 +253,34 @@ static void check_wait(int fd, uint32_t bo, unsigned int wait)
 
 	gem_sync(fd, bo);
 
-	elapsed = igt_nsec_elapsed(&ts);
-	igt_assert_f(elapsed < 250e6,
-		     "Wake up following reset+wedge took %.3fms\n",
-		     elapsed*1e-6);
+	if (st)
+		igt_stats_push(st, igt_nsec_elapsed(&ts));
+}
+
+static void check_wait_elapsed(int fd, igt_stats_t *st)
+{
+	double med, max;
+
+	igt_info("Completed %d resets, wakeups took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms)\n",
+		 st->n_values,
+		 igt_stats_get_mean(st)*1e-6,
+		 igt_stats_get_std_deviation(st)*1e-6,
+		 igt_stats_get_min(st)*1e-6,
+		 igt_stats_get_median(st)*1e-6,
+		 igt_stats_get_max(st)*1e-6);
+
+	if (st->n_values < 9)
+		return; /* too few for stable median */
+
+	med = igt_stats_get_median(st);
+	max = igt_stats_get_max(st);
+	igt_assert_f(med < 250e6 && max < 1250e6,
+		     "Wake up following reset+wedge took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms)\n",
+		     igt_stats_get_mean(st)*1e-6,
+		     igt_stats_get_std_deviation(st)*1e-6,
+		     igt_stats_get_min(st)*1e-6,
+		     igt_stats_get_median(st)*1e-6,
+		     igt_stats_get_max(st)*1e-6);
 }
 
 static void __test_banned(int fd)
@@ -326,7 +350,7 @@ static void test_wait(int fd, unsigned int flags, unsigned int wait)
 
 	hang = spin_sync(fd, 0, I915_EXEC_DEFAULT);
 
-	check_wait(fd, hang->handle, wait);
+	check_wait(fd, hang->handle, wait, NULL);
 
 	igt_spin_batch_free(fd, hang);
 
@@ -401,7 +425,7 @@ static void test_inflight(int fd, unsigned int wait)
 			igt_assert(fence[n] != -1);
 		}
 
-		check_wait(fd, obj[1].handle, wait);
+		check_wait(fd, obj[1].handle, wait, NULL);
 
 		for (unsigned int n = 0; n < max; n++) {
 			igt_assert_eq(sync_fence_status(fence[n]), -EIO);
@@ -457,7 +481,7 @@ static void test_inflight_suspend(int fd)
 	igt_set_autoresume_delay(30);
 	igt_system_suspend_autoresume(SUSPEND_STATE_MEM, SUSPEND_TEST_NONE);
 
-	check_wait(fd, obj[1].handle, 10);
+	check_wait(fd, obj[1].handle, 10, NULL);
 
 	for (unsigned int n = 0; n < max; n++) {
 		igt_assert_eq(sync_fence_status(fence[n]), -EIO);
@@ -535,7 +559,7 @@ static void test_inflight_contexts(int fd, unsigned int wait)
 			igt_assert(fence[n] != -1);
 		}
 
-		check_wait(fd, obj[1].handle, wait);
+		check_wait(fd, obj[1].handle, wait, NULL);
 
 		for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
 			igt_assert_eq(sync_fence_status(fence[n]), -EIO);
@@ -644,7 +668,7 @@ static void test_inflight_internal(int fd, unsigned int wait)
 		nfence++;
 	}
 
-	check_wait(fd, obj[1].handle, wait);
+	check_wait(fd, obj[1].handle, wait, NULL);
 
 	while (nfence--) {
 		igt_assert_eq(sync_fence_status(fences[nfence]), -EIO);
@@ -670,8 +694,11 @@ static void reset_stress(int fd,
 		.buffer_count = 1,
 		.flags = engine,
 	};
+	igt_stats_t stats;
+
 	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
 
+	igt_stats_init(&stats);
 	igt_until_timeout(5) {
 		uint32_t ctx = context_create_safe(fd);
 		igt_spin_t *hang;
@@ -697,7 +724,7 @@ static void reset_stress(int fd,
 			gem_execbuf(fd, &execbuf);
 
 		/* Wedge after a small delay. */
-		check_wait(fd, obj.handle, 100e3);
+		check_wait(fd, obj.handle, 100e3, &stats);
 
 		/* Unwedge by forcing a reset. */
 		igt_assert(i915_reset_control(true));
@@ -721,6 +748,8 @@ static void reset_stress(int fd,
 		igt_spin_batch_free(fd, hang);
 		gem_context_destroy(fd, ctx);
 	}
+	check_wait_elapsed(fd, &stats);
+	igt_stats_fini(&stats);
 
 	gem_close(fd, obj.handle);
 }
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [igt-dev] [PATCH i-g-t 2/2] i915/gem_eio: Allow older platforms more leniency in reset latency
  2019-02-12 20:57 [igt-dev] [PATCH i-g-t 1/2] i915/gem_eio: Check average reset times Chris Wilson
@ 2019-02-12 20:57 ` Chris Wilson
  2019-02-15 14:43   ` [Intel-gfx] " Mika Kuoppala
  2019-02-12 21:22 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] i915/gem_eio: Check average reset times Patchwork
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2019-02-12 20:57 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Older platforms need to clobber the display around a reset (incl. a
modeset to off, and a modeset back on), which can be much slower than
the reset itself. Give these platforms (gen2-4) some leniency and allow
them a higher limit before declaring them a failure.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_eio.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/tests/i915/gem_eio.c b/tests/i915/gem_eio.c
index bd5266104..ac85a2eff 100644
--- a/tests/i915/gem_eio.c
+++ b/tests/i915/gem_eio.c
@@ -259,7 +259,7 @@ static void check_wait(int fd, uint32_t bo, unsigned int wait, igt_stats_t *st)
 
 static void check_wait_elapsed(int fd, igt_stats_t *st)
 {
-	double med, max;
+	double med, max, limit;
 
 	igt_info("Completed %d resets, wakeups took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms)\n",
 		 st->n_values,
@@ -272,15 +272,24 @@ static void check_wait_elapsed(int fd, igt_stats_t *st)
 	if (st->n_values < 9)
 		return; /* too few for stable median */
 
+	/*
+	 * Older platforms need to reset the display (incl. modeset to off,
+	 * modeset back on) around resets, so may take a lot longer.
+	 */
+	limit = 250e6;
+	if (intel_gen(intel_get_drm_devid(fd)) < 5)
+		limit += 300e6; /* guestimate for 2x worstcase modeset */
+
 	med = igt_stats_get_median(st);
 	max = igt_stats_get_max(st);
-	igt_assert_f(med < 250e6 && max < 1250e6,
-		     "Wake up following reset+wedge took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms)\n",
+	igt_assert_f(med < limit && max < 5 * limit,
+		     "Wake up following reset+wedge took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms); limit set to %.0fms on average and %.0fms maximum\n",
 		     igt_stats_get_mean(st)*1e-6,
 		     igt_stats_get_std_deviation(st)*1e-6,
 		     igt_stats_get_min(st)*1e-6,
 		     igt_stats_get_median(st)*1e-6,
-		     igt_stats_get_max(st)*1e-6);
+		     igt_stats_get_max(st)*1e-6,
+		     limit*1e-6, limit*5e-6);
 }
 
 static void __test_banned(int fd)
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] i915/gem_eio: Check average reset times
  2019-02-12 20:57 [igt-dev] [PATCH i-g-t 1/2] i915/gem_eio: Check average reset times Chris Wilson
  2019-02-12 20:57 ` [igt-dev] [PATCH i-g-t 2/2] i915/gem_eio: Allow older platforms more leniency in reset latency Chris Wilson
@ 2019-02-12 21:22 ` Patchwork
  2019-02-12 23:12 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
  2019-02-15 14:40 ` [Intel-gfx] [PATCH i-g-t 1/2] " Mika Kuoppala
  3 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2019-02-12 21:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,1/2] i915/gem_eio: Check average reset times
URL   : https://patchwork.freedesktop.org/series/56576/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5595 -> IGTPW_2387
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/56576/revisions/1/mbox/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_2387:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_selftest@live_objects:
    - {fi-icl-y}:         PASS -> INCOMPLETE

  * igt@i915_selftest@live_workarounds:
    - {fi-icl-u2}:        PASS -> INCOMPLETE

  
Known issues
------------

  Here are the changes found in IGTPW_2387 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@amdgpu/amd_basic@cs-compute:
    - fi-kbl-8809g:       NOTRUN -> FAIL [fdo#108094]

  * igt@kms_pipe_crc_basic@nonblocking-crc-pipe-a-frame-sequence:
    - fi-byt-clapper:     PASS -> FAIL [fdo#103191] / [fdo#107362] +2

  * igt@kms_pipe_crc_basic@read-crc-pipe-b:
    - fi-byt-clapper:     PASS -> FAIL [fdo#107362] +1

  
#### Possible fixes ####

  * igt@amdgpu/amd_basic@userptr:
    - fi-kbl-8809g:       DMESG-WARN [fdo#108965] -> PASS

  * igt@i915_selftest@live_workarounds:
    - {fi-icl-u3}:        INCOMPLETE -> PASS

  * igt@kms_pipe_crc_basic@hang-read-crc-pipe-b:
    - fi-byt-clapper:     FAIL [fdo#103191] / [fdo#107362] -> PASS

  * igt@prime_vgem@basic-fence-flip:
    - fi-gdg-551:         DMESG-FAIL [fdo#103182] -> PASS

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103182]: https://bugs.freedesktop.org/show_bug.cgi?id=103182
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
  [fdo#108094]: https://bugs.freedesktop.org/show_bug.cgi?id=108094
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108965]: https://bugs.freedesktop.org/show_bug.cgi?id=108965


Participating hosts (44 -> 39)
------------------------------

  Missing    (5): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-bdw-samus 


Build changes
-------------

    * IGT: IGT_4819 -> IGTPW_2387

  CI_DRM_5595: 159160c4276fdbf738e5c19549f07e0c7f0f27a8 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_2387: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2387/
  IGT_4819: 845c9fb45b734aef95e2fb2317d0c02567e06a68 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2387/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/2] i915/gem_eio: Check average reset times
  2019-02-12 20:57 [igt-dev] [PATCH i-g-t 1/2] i915/gem_eio: Check average reset times Chris Wilson
  2019-02-12 20:57 ` [igt-dev] [PATCH i-g-t 2/2] i915/gem_eio: Allow older platforms more leniency in reset latency Chris Wilson
  2019-02-12 21:22 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] i915/gem_eio: Check average reset times Patchwork
@ 2019-02-12 23:12 ` Patchwork
  2019-02-15 14:40 ` [Intel-gfx] [PATCH i-g-t 1/2] " Mika Kuoppala
  3 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2019-02-12 23:12 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,1/2] i915/gem_eio: Check average reset times
URL   : https://patchwork.freedesktop.org/series/56576/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5595_full -> IGTPW_2387_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/56576/revisions/1/mbox/

Known issues
------------

  Here are the changes found in IGTPW_2387_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_gttfill@basic:
    - shard-snb:          PASS -> INCOMPLETE [fdo#105411]

  * igt@kms_busy@extended-modeset-hang-newfb-with-reset-render-a:
    - shard-glk:          NOTRUN -> DMESG-WARN [fdo#107956]

  * igt@kms_color@pipe-a-ctm-max:
    - shard-apl:          PASS -> FAIL [fdo#108147]

  * igt@kms_color@pipe-a-legacy-gamma:
    - shard-apl:          PASS -> FAIL [fdo#104782] / [fdo#108145]

  * igt@kms_color@pipe-b-legacy-gamma:
    - shard-apl:          PASS -> FAIL [fdo#104782] +1

  * igt@kms_cursor_crc@cursor-256x256-suspend:
    - shard-apl:          PASS -> FAIL [fdo#103191] / [fdo#103232]

  * igt@kms_cursor_crc@cursor-256x85-random:
    - shard-apl:          PASS -> FAIL [fdo#103232] +4

  * igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic:
    - shard-glk:          NOTRUN -> FAIL [fdo#105454] / [fdo#106509]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-render:
    - shard-apl:          PASS -> FAIL [fdo#103167]

  * igt@kms_frontbuffer_tracking@fbc-1p-rte:
    - shard-apl:          PASS -> FAIL [fdo#103167] / [fdo#105682]

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-mmap-wc:
    - shard-glk:          PASS -> FAIL [fdo#103167] +13

  * igt@kms_frontbuffer_tracking@fbc-2p-rte:
    - shard-glk:          PASS -> FAIL [fdo#103167] / [fdo#105682]

  * igt@kms_plane@pixel-format-pipe-b-planes-source-clamping:
    - shard-apl:          PASS -> FAIL [fdo#108948]

  * igt@kms_plane@plane-position-covered-pipe-b-planes:
    - shard-apl:          PASS -> FAIL [fdo#103166]

  * igt@kms_plane_alpha_blend@pipe-a-constant-alpha-max:
    - shard-glk:          PASS -> FAIL [fdo#108145] +2

  * igt@kms_plane_multiple@atomic-pipe-c-tiling-y:
    - shard-glk:          PASS -> FAIL [fdo#103166] +2

  
#### Possible fixes ####

  * igt@gem_eio@unwedge-stress:
    - shard-snb:          FAIL [fdo#107799] -> PASS

  * igt@kms_cursor_crc@cursor-256x85-onscreen:
    - shard-apl:          FAIL [fdo#103232] -> PASS +4

  * igt@kms_cursor_crc@cursor-size-change:
    - shard-glk:          FAIL [fdo#103232] -> PASS

  * igt@kms_flip@flip-vs-expired-vblank:
    - shard-glk:          FAIL [fdo#105363] -> PASS

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-fullscreen:
    - shard-apl:          FAIL [fdo#103167] -> PASS +2

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-mmap-cpu:
    - shard-glk:          FAIL [fdo#103167] -> PASS

  * igt@kms_plane@pixel-format-pipe-a-planes-source-clamping:
    - shard-glk:          FAIL [fdo#108948] -> PASS
    - shard-apl:          FAIL [fdo#108948] -> PASS

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes:
    - shard-glk:          INCOMPLETE [fdo#103359] / [k.org#198133] -> PASS

  * igt@kms_plane_alpha_blend@pipe-b-constant-alpha-max:
    - shard-glk:          FAIL [fdo#108145] -> PASS

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-yf:
    - shard-apl:          FAIL [fdo#103166] -> PASS +1

  * igt@kms_universal_plane@universal-plane-pipe-b-functional:
    - shard-glk:          FAIL [fdo#103166] -> PASS +2

  * igt@perf_pmu@rc6-runtime-pm-long:
    - shard-apl:          FAIL [fdo#105010] -> PASS
    - shard-glk:          FAIL [fdo#105010] -> PASS

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103359]: https://bugs.freedesktop.org/show_bug.cgi?id=103359
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#104782]: https://bugs.freedesktop.org/show_bug.cgi?id=104782
  [fdo#105010]: https://bugs.freedesktop.org/show_bug.cgi?id=105010
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#105411]: https://bugs.freedesktop.org/show_bug.cgi?id=105411
  [fdo#105454]: https://bugs.freedesktop.org/show_bug.cgi?id=105454
  [fdo#105682]: https://bugs.freedesktop.org/show_bug.cgi?id=105682
  [fdo#106509]: https://bugs.freedesktop.org/show_bug.cgi?id=106509
  [fdo#107799]: https://bugs.freedesktop.org/show_bug.cgi?id=107799
  [fdo#107956]: https://bugs.freedesktop.org/show_bug.cgi?id=107956
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108147]: https://bugs.freedesktop.org/show_bug.cgi?id=108147
  [fdo#108948]: https://bugs.freedesktop.org/show_bug.cgi?id=108948
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [k.org#198133]: https://bugzilla.kernel.org/show_bug.cgi?id=198133


Participating hosts (6 -> 4)
------------------------------

  Missing    (2): shard-skl shard-iclb 


Build changes
-------------

    * IGT: IGT_4819 -> IGTPW_2387
    * Piglit: piglit_4509 -> None

  CI_DRM_5595: 159160c4276fdbf738e5c19549f07e0c7f0f27a8 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_2387: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2387/
  IGT_4819: 845c9fb45b734aef95e2fb2317d0c02567e06a68 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2387/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Intel-gfx] [PATCH i-g-t 1/2] i915/gem_eio: Check average reset times
  2019-02-12 20:57 [igt-dev] [PATCH i-g-t 1/2] i915/gem_eio: Check average reset times Chris Wilson
                   ` (2 preceding siblings ...)
  2019-02-12 23:12 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
@ 2019-02-15 14:40 ` Mika Kuoppala
  2019-02-15 14:52   ` [igt-dev] " Chris Wilson
  3 siblings, 1 reply; 8+ messages in thread
From: Mika Kuoppala @ 2019-02-15 14:40 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev

Chris Wilson <chris@chris-wilson.co.uk> writes:

> As we have moved to rcu/srcu to serialise the resets, individual resets
> are subject to small variations in system grace periods. Allow for this
> by only expecting the median reset time to be within our target, thereby
> excluding noisy outliers from perturbing our results (but keep the
> maximum capped to prevent horrid failures!)
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  tests/i915/gem_eio.c | 53 ++++++++++++++++++++++++++++++++++----------
>  1 file changed, 41 insertions(+), 12 deletions(-)
>
> diff --git a/tests/i915/gem_eio.c b/tests/i915/gem_eio.c
> index 61054a07e..bd5266104 100644
> --- a/tests/i915/gem_eio.c
> +++ b/tests/i915/gem_eio.c
> @@ -42,6 +42,7 @@
>  
>  #include "igt.h"
>  #include "igt_device.h"
> +#include "igt_stats.h"
>  #include "igt_sysfs.h"
>  #include "sw_sync.h"
>  #include "i915/gem_ring.h"
> @@ -239,10 +240,9 @@ static void hang_after(int fd, unsigned int us, struct timespec *ts)
>  	igt_assert_eq(timer_settime(ctx->timer, 0, &its, NULL), 0);
>  }
>  
> -static void check_wait(int fd, uint32_t bo, unsigned int wait)
> +static void check_wait(int fd, uint32_t bo, unsigned int wait, igt_stats_t *st)
>  {
>  	struct timespec ts = {};
> -	uint64_t elapsed;
>  
>  	if (wait) {
>  		hang_after(fd, wait, &ts);
> @@ -253,10 +253,34 @@ static void check_wait(int fd, uint32_t bo, unsigned int wait)
>  
>  	gem_sync(fd, bo);
>  
> -	elapsed = igt_nsec_elapsed(&ts);
> -	igt_assert_f(elapsed < 250e6,
> -		     "Wake up following reset+wedge took %.3fms\n",
> -		     elapsed*1e-6);
> +	if (st)
> +		igt_stats_push(st, igt_nsec_elapsed(&ts));

You prolly want igt_nsec_elapsed() primed regardless
of wait, otherwise I see -1 pushed polluting the stats.

-Mika


> +}
> +
> +static void check_wait_elapsed(int fd, igt_stats_t *st)
> +{
> +	double med, max;
> +
> +	igt_info("Completed %d resets, wakeups took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms)\n",
> +		 st->n_values,
> +		 igt_stats_get_mean(st)*1e-6,
> +		 igt_stats_get_std_deviation(st)*1e-6,
> +		 igt_stats_get_min(st)*1e-6,
> +		 igt_stats_get_median(st)*1e-6,
> +		 igt_stats_get_max(st)*1e-6);
> +
> +	if (st->n_values < 9)
> +		return; /* too few for stable median */
> +
> +	med = igt_stats_get_median(st);
> +	max = igt_stats_get_max(st);
> +	igt_assert_f(med < 250e6 && max < 1250e6,
> +		     "Wake up following reset+wedge took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms)\n",
> +		     igt_stats_get_mean(st)*1e-6,
> +		     igt_stats_get_std_deviation(st)*1e-6,
> +		     igt_stats_get_min(st)*1e-6,
> +		     igt_stats_get_median(st)*1e-6,
> +		     igt_stats_get_max(st)*1e-6);
>  }
>  
>  static void __test_banned(int fd)
> @@ -326,7 +350,7 @@ static void test_wait(int fd, unsigned int flags, unsigned int wait)
>  
>  	hang = spin_sync(fd, 0, I915_EXEC_DEFAULT);
>  
> -	check_wait(fd, hang->handle, wait);
> +	check_wait(fd, hang->handle, wait, NULL);
>  
>  	igt_spin_batch_free(fd, hang);
>  
> @@ -401,7 +425,7 @@ static void test_inflight(int fd, unsigned int wait)
>  			igt_assert(fence[n] != -1);
>  		}
>  
> -		check_wait(fd, obj[1].handle, wait);
> +		check_wait(fd, obj[1].handle, wait, NULL);
>  
>  		for (unsigned int n = 0; n < max; n++) {
>  			igt_assert_eq(sync_fence_status(fence[n]), -EIO);
> @@ -457,7 +481,7 @@ static void test_inflight_suspend(int fd)
>  	igt_set_autoresume_delay(30);
>  	igt_system_suspend_autoresume(SUSPEND_STATE_MEM, SUSPEND_TEST_NONE);
>  
> -	check_wait(fd, obj[1].handle, 10);
> +	check_wait(fd, obj[1].handle, 10, NULL);
>  
>  	for (unsigned int n = 0; n < max; n++) {
>  		igt_assert_eq(sync_fence_status(fence[n]), -EIO);
> @@ -535,7 +559,7 @@ static void test_inflight_contexts(int fd, unsigned int wait)
>  			igt_assert(fence[n] != -1);
>  		}
>  
> -		check_wait(fd, obj[1].handle, wait);
> +		check_wait(fd, obj[1].handle, wait, NULL);
>  
>  		for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
>  			igt_assert_eq(sync_fence_status(fence[n]), -EIO);
> @@ -644,7 +668,7 @@ static void test_inflight_internal(int fd, unsigned int wait)
>  		nfence++;
>  	}
>  
> -	check_wait(fd, obj[1].handle, wait);
> +	check_wait(fd, obj[1].handle, wait, NULL);
>  
>  	while (nfence--) {
>  		igt_assert_eq(sync_fence_status(fences[nfence]), -EIO);
> @@ -670,8 +694,11 @@ static void reset_stress(int fd,
>  		.buffer_count = 1,
>  		.flags = engine,
>  	};
> +	igt_stats_t stats;
> +
>  	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
>  
> +	igt_stats_init(&stats);
>  	igt_until_timeout(5) {
>  		uint32_t ctx = context_create_safe(fd);
>  		igt_spin_t *hang;
> @@ -697,7 +724,7 @@ static void reset_stress(int fd,
>  			gem_execbuf(fd, &execbuf);
>  
>  		/* Wedge after a small delay. */
> -		check_wait(fd, obj.handle, 100e3);
> +		check_wait(fd, obj.handle, 100e3, &stats);
>  
>  		/* Unwedge by forcing a reset. */
>  		igt_assert(i915_reset_control(true));
> @@ -721,6 +748,8 @@ static void reset_stress(int fd,
>  		igt_spin_batch_free(fd, hang);
>  		gem_context_destroy(fd, ctx);
>  	}
> +	check_wait_elapsed(fd, &stats);
> +	igt_stats_fini(&stats);
>  
>  	gem_close(fd, obj.handle);
>  }
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Intel-gfx] [PATCH i-g-t 2/2] i915/gem_eio: Allow older platforms more leniency in reset latency
  2019-02-12 20:57 ` [igt-dev] [PATCH i-g-t 2/2] i915/gem_eio: Allow older platforms more leniency in reset latency Chris Wilson
@ 2019-02-15 14:43   ` Mika Kuoppala
  0 siblings, 0 replies; 8+ messages in thread
From: Mika Kuoppala @ 2019-02-15 14:43 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Older platforms need to clobber the display around a reset (incl. a
> modeset to off, and a modeset back on), which can be much slower than
> the reset itself. Give these platforms (gen2-4) some leniency and allow
> them a higher limit before declaring them a failure.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  tests/i915/gem_eio.c | 17 +++++++++++++----
>  1 file changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/tests/i915/gem_eio.c b/tests/i915/gem_eio.c
> index bd5266104..ac85a2eff 100644
> --- a/tests/i915/gem_eio.c
> +++ b/tests/i915/gem_eio.c
> @@ -259,7 +259,7 @@ static void check_wait(int fd, uint32_t bo, unsigned int wait, igt_stats_t *st)
>  
>  static void check_wait_elapsed(int fd, igt_stats_t *st)
>  {
> -	double med, max;
> +	double med, max, limit;
>  
>  	igt_info("Completed %d resets, wakeups took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms)\n",
>  		 st->n_values,
> @@ -272,15 +272,24 @@ static void check_wait_elapsed(int fd, igt_stats_t *st)
>  	if (st->n_values < 9)
>  		return; /* too few for stable median */
>  
> +	/*
> +	 * Older platforms need to reset the display (incl. modeset to off,
> +	 * modeset back on) around resets, so may take a lot longer.
> +	 */
> +	limit = 250e6;
> +	if (intel_gen(intel_get_drm_devid(fd)) < 5)
> +		limit += 300e6; /* guestimate for 2x worstcase modeset */

Don't have any insight to offer for the hard cap. 550ms sounds
reasonable!

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> +
>  	med = igt_stats_get_median(st);
>  	max = igt_stats_get_max(st);
> -	igt_assert_f(med < 250e6 && max < 1250e6,
> -		     "Wake up following reset+wedge took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms)\n",
> +	igt_assert_f(med < limit && max < 5 * limit,
> +		     "Wake up following reset+wedge took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms); limit set to %.0fms on average and %.0fms maximum\n",
>  		     igt_stats_get_mean(st)*1e-6,
>  		     igt_stats_get_std_deviation(st)*1e-6,
>  		     igt_stats_get_min(st)*1e-6,
>  		     igt_stats_get_median(st)*1e-6,
> -		     igt_stats_get_max(st)*1e-6);
> +		     igt_stats_get_max(st)*1e-6,
> +		     limit*1e-6, limit*5e-6);
>  }
>  
>  static void __test_banned(int fd)
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t 1/2] i915/gem_eio: Check average reset times
  2019-02-15 14:40 ` [Intel-gfx] [PATCH i-g-t 1/2] " Mika Kuoppala
@ 2019-02-15 14:52   ` Chris Wilson
  2019-02-15 15:02     ` Mika Kuoppala via igt-dev
  0 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2019-02-15 14:52 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx; +Cc: igt-dev

Quoting Mika Kuoppala (2019-02-15 14:40:30)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > As we have moved to rcu/srcu to serialise the resets, individual resets
> > are subject to small variations in system grace periods. Allow for this
> > by only expecting the median reset time to be within our target, thereby
> > excluding noisy outliers from perturbing our results (but keep the
> > maximum capped to prevent horrid failures!)
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  tests/i915/gem_eio.c | 53 ++++++++++++++++++++++++++++++++++----------
> >  1 file changed, 41 insertions(+), 12 deletions(-)
> >
> > diff --git a/tests/i915/gem_eio.c b/tests/i915/gem_eio.c
> > index 61054a07e..bd5266104 100644
> > --- a/tests/i915/gem_eio.c
> > +++ b/tests/i915/gem_eio.c
> > @@ -42,6 +42,7 @@
> >  
> >  #include "igt.h"
> >  #include "igt_device.h"
> > +#include "igt_stats.h"
> >  #include "igt_sysfs.h"
> >  #include "sw_sync.h"
> >  #include "i915/gem_ring.h"
> > @@ -239,10 +240,9 @@ static void hang_after(int fd, unsigned int us, struct timespec *ts)
> >       igt_assert_eq(timer_settime(ctx->timer, 0, &its, NULL), 0);
> >  }
> >  
> > -static void check_wait(int fd, uint32_t bo, unsigned int wait)
> > +static void check_wait(int fd, uint32_t bo, unsigned int wait, igt_stats_t *st)
> >  {
> >       struct timespec ts = {};
> > -     uint64_t elapsed;
> >  
> >       if (wait) {
> >               hang_after(fd, wait, &ts);
> > @@ -253,10 +253,34 @@ static void check_wait(int fd, uint32_t bo, unsigned int wait)
> >  
> >       gem_sync(fd, bo);
> >  
> > -     elapsed = igt_nsec_elapsed(&ts);
> > -     igt_assert_f(elapsed < 250e6,
> > -                  "Wake up following reset+wedge took %.3fms\n",
> > -                  elapsed*1e-6);
> > +     if (st)
> > +             igt_stats_push(st, igt_nsec_elapsed(&ts));
> 
> You prolly want igt_nsec_elapsed() primed regardless
> of wait, otherwise I see -1 pushed polluting the stats.

It is...

if (wait)
	hang_after() sets ts start point
else
	we set ts start point.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t 1/2] i915/gem_eio: Check average reset times
  2019-02-15 14:52   ` [igt-dev] " Chris Wilson
@ 2019-02-15 15:02     ` Mika Kuoppala via igt-dev
  0 siblings, 0 replies; 8+ messages in thread
From: Mika Kuoppala via igt-dev @ 2019-02-15 15:02 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-02-15 14:40:30)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > As we have moved to rcu/srcu to serialise the resets, individual resets
>> > are subject to small variations in system grace periods. Allow for this
>> > by only expecting the median reset time to be within our target, thereby
>> > excluding noisy outliers from perturbing our results (but keep the
>> > maximum capped to prevent horrid failures!)
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > ---
>> >  tests/i915/gem_eio.c | 53 ++++++++++++++++++++++++++++++++++----------
>> >  1 file changed, 41 insertions(+), 12 deletions(-)
>> >
>> > diff --git a/tests/i915/gem_eio.c b/tests/i915/gem_eio.c
>> > index 61054a07e..bd5266104 100644
>> > --- a/tests/i915/gem_eio.c
>> > +++ b/tests/i915/gem_eio.c
>> > @@ -42,6 +42,7 @@
>> >  
>> >  #include "igt.h"
>> >  #include "igt_device.h"
>> > +#include "igt_stats.h"
>> >  #include "igt_sysfs.h"
>> >  #include "sw_sync.h"
>> >  #include "i915/gem_ring.h"
>> > @@ -239,10 +240,9 @@ static void hang_after(int fd, unsigned int us, struct timespec *ts)
>> >       igt_assert_eq(timer_settime(ctx->timer, 0, &its, NULL), 0);
>> >  }
>> >  
>> > -static void check_wait(int fd, uint32_t bo, unsigned int wait)
>> > +static void check_wait(int fd, uint32_t bo, unsigned int wait, igt_stats_t *st)
>> >  {
>> >       struct timespec ts = {};
>> > -     uint64_t elapsed;
>> >  
>> >       if (wait) {
>> >               hang_after(fd, wait, &ts);
>> > @@ -253,10 +253,34 @@ static void check_wait(int fd, uint32_t bo, unsigned int wait)
>> >  
>> >       gem_sync(fd, bo);
>> >  
>> > -     elapsed = igt_nsec_elapsed(&ts);
>> > -     igt_assert_f(elapsed < 250e6,
>> > -                  "Wake up following reset+wedge took %.3fms\n",
>> > -                  elapsed*1e-6);
>> > +     if (st)
>> > +             igt_stats_push(st, igt_nsec_elapsed(&ts));
>> 
>> You prolly want igt_nsec_elapsed() primed regardless
>> of wait, otherwise I see -1 pushed polluting the stats.
>
> It is...
>
> if (wait)
> 	hang_after() sets ts start point
> else
> 	we set ts start point.

Indeed. It would have helped to read the
parameters for hang_after.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-02-15 15:02 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-02-12 20:57 [igt-dev] [PATCH i-g-t 1/2] i915/gem_eio: Check average reset times Chris Wilson
2019-02-12 20:57 ` [igt-dev] [PATCH i-g-t 2/2] i915/gem_eio: Allow older platforms more leniency in reset latency Chris Wilson
2019-02-15 14:43   ` [Intel-gfx] " Mika Kuoppala
2019-02-12 21:22 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] i915/gem_eio: Check average reset times Patchwork
2019-02-12 23:12 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
2019-02-15 14:40 ` [Intel-gfx] [PATCH i-g-t 1/2] " Mika Kuoppala
2019-02-15 14:52   ` [igt-dev] " Chris Wilson
2019-02-15 15:02     ` Mika Kuoppala via igt-dev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox