* [igt-dev] [PATCH i-g-t] igt/gem_eio: Measure reset delay from thread
@ 2018-07-27 11:13 Chris Wilson
2018-07-27 13:24 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
` (4 more replies)
0 siblings, 5 replies; 10+ messages in thread
From: Chris Wilson @ 2018-07-27 11:13 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev
We assert that we complete a wedge within 250ms. However, when we use a
thread to delay the wedging until after we start waiting, that thread
itself is delayed longer than our wait timeout. This results in a false
positive error where we fail the test before we even trigger the reset.
Reorder the test so that we only ever measure the delay from triggering
the reset until we wakeup, and assert that is in a timely fashion
(less than 250ms).
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105954
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
tests/gem_eio.c | 45 ++++++++++++++++++++++++---------------------
1 file changed, 24 insertions(+), 21 deletions(-)
diff --git a/tests/gem_eio.c b/tests/gem_eio.c
index 3162a3170..b26b4b895 100644
--- a/tests/gem_eio.c
+++ b/tests/gem_eio.c
@@ -190,7 +190,8 @@ static igt_spin_t * spin_sync(int fd, uint32_t ctx, unsigned long flags)
struct hang_ctx {
int debugfs;
- struct timespec ts;
+ struct timespec delay;
+ struct timespec *ts;
timer_t timer;
};
@@ -198,8 +199,10 @@ static void hang_handler(union sigval arg)
{
struct hang_ctx *ctx = arg.sival_ptr;
- igt_debug("hang delay = %.2fus\n", igt_nsec_elapsed(&ctx->ts) / 1000.0);
+ igt_debug("hang delay = %.2fus\n",
+ igt_nsec_elapsed(&ctx->delay) / 1000.0);
+ igt_nsec_elapsed(ctx->ts);
igt_assert(igt_sysfs_set(ctx->debugfs, "i915_wedged", "-1"));
igt_assert_eq(timer_delete(ctx->timer), 0);
@@ -207,7 +210,7 @@ static void hang_handler(union sigval arg)
free(ctx);
}
-static void hang_after(int fd, unsigned int us)
+static void hang_after(int fd, unsigned int us, struct timespec *ts)
{
struct sigevent sev = {
.sigev_notify = SIGEV_THREAD,
@@ -229,30 +232,30 @@ static void hang_after(int fd, unsigned int us)
igt_assert_eq(timer_create(CLOCK_MONOTONIC, &sev, &ctx->timer), 0);
- igt_nsec_elapsed(&ctx->ts);
+ ctx->ts = ts;
+ igt_nsec_elapsed(&ctx->delay);
igt_assert_eq(timer_settime(ctx->timer, 0, &its, NULL), 0);
}
-static int __check_wait(int fd, uint32_t bo, unsigned int wait)
+static void __check_wait(int fd, uint32_t bo, unsigned int wait)
{
- unsigned long wait_timeout = 250e6; /* Some margin for actual reset. */
- int ret;
+ struct timespec ts = {};
+ uint64_t elapsed;
if (wait) {
- /*
- * Double the wait plus some fixed margin to ensure gem_wait
- * can never time out before the async hang runs.
- */
- wait_timeout += wait * 2000 + 250e6;
- hang_after(fd, wait);
+ hang_after(fd, wait, &ts);
} else {
+ igt_nsec_elapsed(&ts);
manual_hang(fd);
}
- ret = __gem_wait(fd, bo, wait_timeout);
+ gem_sync(fd, bo);
- return ret;
+ elapsed = igt_nsec_elapsed(&ts);
+ igt_assert_f(elapsed < 250e6,
+ "Wake up following reset+wedge took %.3fms\n",
+ elapsed*1e-6);
}
static void __test_banned(int fd)
@@ -322,7 +325,7 @@ static void test_wait(int fd, unsigned int flags, unsigned int wait)
hang = spin_sync(fd, 0, I915_EXEC_DEFAULT);
- igt_assert_eq(__check_wait(fd, hang->handle, wait), 0);
+ __check_wait(fd, hang->handle, wait);
igt_spin_batch_free(fd, hang);
@@ -392,7 +395,7 @@ static void test_inflight(int fd, unsigned int wait)
igt_assert(fence[n] != -1);
}
- igt_assert_eq(__check_wait(fd, obj[1].handle, wait), 0);
+ __check_wait(fd, obj[1].handle, wait);
for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
igt_assert_eq(sync_fence_status(fence[n]), -EIO);
@@ -443,7 +446,7 @@ static void test_inflight_suspend(int fd)
igt_set_autoresume_delay(30);
igt_system_suspend_autoresume(SUSPEND_STATE_MEM, SUSPEND_TEST_NONE);
- igt_assert_eq(__check_wait(fd, obj[1].handle, 10), 0);
+ __check_wait(fd, obj[1].handle, 10);
for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
igt_assert_eq(sync_fence_status(fence[n]), -EIO);
@@ -521,7 +524,7 @@ static void test_inflight_contexts(int fd, unsigned int wait)
igt_assert(fence[n] != -1);
}
- igt_assert_eq(__check_wait(fd, obj[1].handle, wait), 0);
+ __check_wait(fd, obj[1].handle, wait);
for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
igt_assert_eq(sync_fence_status(fence[n]), -EIO);
@@ -630,7 +633,7 @@ static void test_inflight_internal(int fd, unsigned int wait)
nfence++;
}
- igt_assert_eq(__check_wait(fd, obj[1].handle, wait), 0);
+ __check_wait(fd, obj[1].handle, wait);
while (nfence--) {
igt_assert_eq(sync_fence_status(fences[nfence]), -EIO);
@@ -682,7 +685,7 @@ static void test_reset_stress(int fd, unsigned int flags)
gem_execbuf(fd, &execbuf);
/* Wedge after a small delay. */
- igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
+ __check_wait(fd, obj.handle, 100e3);
/* Unwedge by forcing a reset. */
igt_assert(i915_reset_control(true));
--
2.18.0
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply related [flat|nested] 10+ messages in thread* [igt-dev] ✓ Fi.CI.BAT: success for igt/gem_eio: Measure reset delay from thread 2018-07-27 11:13 [igt-dev] [PATCH i-g-t] igt/gem_eio: Measure reset delay from thread Chris Wilson @ 2018-07-27 13:24 ` Patchwork 2018-07-27 14:52 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork ` (3 subsequent siblings) 4 siblings, 0 replies; 10+ messages in thread From: Patchwork @ 2018-07-27 13:24 UTC (permalink / raw) To: Chris Wilson; +Cc: igt-dev == Series Details == Series: igt/gem_eio: Measure reset delay from thread URL : https://patchwork.freedesktop.org/series/47338/ State : success == Summary == = CI Bug Log - changes from CI_DRM_4560 -> IGTPW_1662 = == Summary - SUCCESS == No regressions found. External URL: https://patchwork.freedesktop.org/api/1.0/series/47338/revisions/1/mbox/ == Known issues == Here are the changes found in IGTPW_1662 that come from known issues: === IGT changes === ==== Issues hit ==== igt@kms_frontbuffer_tracking@basic: fi-kbl-r: PASS -> DMESG-WARN (fdo#105602) igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b: fi-snb-2520m: PASS -> INCOMPLETE (fdo#103713) igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c: fi-bxt-dsi: PASS -> INCOMPLETE (fdo#103927) ==== Possible fixes ==== igt@kms_chamelium@dp-crc-fast: fi-kbl-7500u: DMESG-FAIL (fdo#103841) -> PASS {igt@kms_psr@primary_mmap_gtt}: fi-cnl-psr: DMESG-WARN (fdo#107372) -> PASS {name}: This element is suppressed. This means it is ignored when computing the status of the difference (SUCCESS, WARNING, or FAILURE). fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713 fdo#103841 https://bugs.freedesktop.org/show_bug.cgi?id=103841 fdo#103927 https://bugs.freedesktop.org/show_bug.cgi?id=103927 fdo#105602 https://bugs.freedesktop.org/show_bug.cgi?id=105602 fdo#107372 https://bugs.freedesktop.org/show_bug.cgi?id=107372 == Participating hosts (51 -> 46) == Additional (1): fi-skl-guc Missing (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-icl-u fi-byt-clapper == Build changes == * IGT: IGT_4576 -> IGTPW_1662 CI_DRM_4560: b73c0ddef408783e556741ac9d3679b7d153e3e1 @ git://anongit.freedesktop.org/gfx-ci/linux IGTPW_1662: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1662/ IGT_4576: bcb37a9b20eeec97f15fac2222408cc2e0b77631 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1662/issues.html _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 10+ messages in thread
* [igt-dev] ✗ Fi.CI.IGT: failure for igt/gem_eio: Measure reset delay from thread 2018-07-27 11:13 [igt-dev] [PATCH i-g-t] igt/gem_eio: Measure reset delay from thread Chris Wilson 2018-07-27 13:24 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork @ 2018-07-27 14:52 ` Patchwork 2018-07-27 15:16 ` Chris Wilson 2018-07-30 16:20 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork ` (2 subsequent siblings) 4 siblings, 1 reply; 10+ messages in thread From: Patchwork @ 2018-07-27 14:52 UTC (permalink / raw) To: Chris Wilson; +Cc: igt-dev == Series Details == Series: igt/gem_eio: Measure reset delay from thread URL : https://patchwork.freedesktop.org/series/47338/ State : failure == Summary == = CI Bug Log - changes from IGT_4576_full -> IGTPW_1662_full = == Summary - FAILURE == Serious unknown changes coming with IGTPW_1662_full absolutely need to be verified manually. If you think the reported changes have nothing to do with the changes introduced in IGTPW_1662_full, please notify your bug team to allow them to document this new failure mode, which will reduce false positives in CI. External URL: https://patchwork.freedesktop.org/api/1.0/series/47338/revisions/1/mbox/ == Possible new issues == Here are the unknown changes that may have been introduced in IGTPW_1662_full: === IGT changes === ==== Possible regressions ==== igt@gem_eio@reset-stress: shard-hsw: PASS -> FAIL +2 igt@gem_eio@wait-10ms: shard-snb: PASS -> FAIL ==== Warnings ==== igt@perf_pmu@rc6: shard-kbl: PASS -> SKIP == Known issues == Here are the changes found in IGTPW_1662_full that come from known issues: === IGT changes === ==== Issues hit ==== igt@kms_cursor_crc@cursor-256x256-suspend: shard-apl: PASS -> FAIL (fdo#103375) igt@kms_cursor_crc@cursor-64x64-suspend: shard-snb: PASS -> INCOMPLETE (fdo#105411) +1 igt@pm_rpm@modeset-non-lpsp-stress: shard-kbl: PASS -> FAIL (fdo#106539) shard-hsw: PASS -> FAIL (fdo#106539) shard-glk: PASS -> FAIL (fdo#106539) shard-apl: PASS -> FAIL (fdo#106539) ==== Possible fixes ==== igt@gem_softpin@evict-active-interruptible: shard-snb: INCOMPLETE (fdo#105411) -> PASS igt@kms_draw_crc@draw-method-xrgb8888-pwrite-ytiled: shard-glk: FAIL (fdo#107401) -> PASS igt@kms_flip@2x-plain-flip-fb-recreate: shard-glk: FAIL (fdo#100368) -> PASS igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes: shard-kbl: INCOMPLETE (fdo#103665) -> PASS igt@kms_setmode@basic: shard-apl: FAIL (fdo#99912) -> PASS igt@kms_vblank@pipe-b-ts-continuation-dpms-suspend: shard-glk: FAIL (fdo#103375) -> PASS igt@prime_vgem@basic-fence-flip: shard-kbl: DMESG-WARN (fdo#106247) -> PASS fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368 fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375 fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665 fdo#105411 https://bugs.freedesktop.org/show_bug.cgi?id=105411 fdo#106247 https://bugs.freedesktop.org/show_bug.cgi?id=106247 fdo#106539 https://bugs.freedesktop.org/show_bug.cgi?id=106539 fdo#107401 https://bugs.freedesktop.org/show_bug.cgi?id=107401 fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912 == Participating hosts (5 -> 5) == No changes in participating hosts == Build changes == * IGT: IGT_4576 -> IGTPW_1662 * Linux: CI_DRM_4547 -> CI_DRM_4560 CI_DRM_4547: 0a7ab192a697e951b2404f3c1ce42c5fa74f9ed1 @ git://anongit.freedesktop.org/gfx-ci/linux CI_DRM_4560: b73c0ddef408783e556741ac9d3679b7d153e3e1 @ git://anongit.freedesktop.org/gfx-ci/linux IGTPW_1662: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1662/ IGT_4576: bcb37a9b20eeec97f15fac2222408cc2e0b77631 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1662/shards.html _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [igt-dev] ✗ Fi.CI.IGT: failure for igt/gem_eio: Measure reset delay from thread 2018-07-27 14:52 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork @ 2018-07-27 15:16 ` Chris Wilson 2018-07-27 18:44 ` Chris Wilson 0 siblings, 1 reply; 10+ messages in thread From: Chris Wilson @ 2018-07-27 15:16 UTC (permalink / raw) To: Patchwork, igt-dev Quoting Patchwork (2018-07-27 15:52:37) > == Series Details == > > Series: igt/gem_eio: Measure reset delay from thread > URL : https://patchwork.freedesktop.org/series/47338/ > State : failure > > == Summary == > > = CI Bug Log - changes from IGT_4576_full -> IGTPW_1662_full = > > == Summary - FAILURE == > > Serious unknown changes coming with IGTPW_1662_full absolutely need to be > verified manually. > > If you think the reported changes have nothing to do with the changes > introduced in IGTPW_1662_full, please notify your bug team to allow them > to document this new failure mode, which will reduce false positives in CI. > > External URL: https://patchwork.freedesktop.org/api/1.0/series/47338/revisions/1/mbox/ > > == Possible new issues == > > Here are the unknown changes that may have been introduced in IGTPW_1662_full: > > === IGT changes === > > ==== Possible regressions ==== > > igt@gem_eio@reset-stress: > shard-hsw: PASS -> FAIL +2 > > igt@gem_eio@wait-10ms: > shard-snb: PASS -> FAIL > > > ==== Warnings ==== > > igt@perf_pmu@rc6: > shard-kbl: PASS -> SKIP > > > == Known issues == > > Here are the changes found in IGTPW_1662_full that come from known issues: > > === IGT changes === > > ==== Issues hit ==== > > igt@kms_cursor_crc@cursor-256x256-suspend: > shard-apl: PASS -> FAIL (fdo#103375) > > igt@kms_cursor_crc@cursor-64x64-suspend: > shard-snb: PASS -> INCOMPLETE (fdo#105411) +1 > > igt@pm_rpm@modeset-non-lpsp-stress: > shard-kbl: PASS -> FAIL (fdo#106539) > shard-hsw: PASS -> FAIL (fdo#106539) > shard-glk: PASS -> FAIL (fdo#106539) > shard-apl: PASS -> FAIL (fdo#106539) Hmm, eye opening. We've found a genuine bug, missing the wakeup. -Chris _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [igt-dev] ✗ Fi.CI.IGT: failure for igt/gem_eio: Measure reset delay from thread 2018-07-27 15:16 ` Chris Wilson @ 2018-07-27 18:44 ` Chris Wilson 0 siblings, 0 replies; 10+ messages in thread From: Chris Wilson @ 2018-07-27 18:44 UTC (permalink / raw) To: Patchwork, igt-dev Quoting Chris Wilson (2018-07-27 16:16:32) > > == Known issues == > > > > Here are the changes found in IGTPW_1662_full that come from known issues: > > > > === IGT changes === > > > > ==== Issues hit ==== > > > > igt@kms_cursor_crc@cursor-256x256-suspend: > > shard-apl: PASS -> FAIL (fdo#103375) > > > > igt@kms_cursor_crc@cursor-64x64-suspend: > > shard-snb: PASS -> INCOMPLETE (fdo#105411) +1 > > > > igt@pm_rpm@modeset-non-lpsp-stress: > > shard-kbl: PASS -> FAIL (fdo#106539) > > shard-hsw: PASS -> FAIL (fdo#106539) > > shard-glk: PASS -> FAIL (fdo#106539) > > shard-apl: PASS -> FAIL (fdo#106539) > > Hmm, eye opening. We've found a genuine bug, missing the wakeup. Band aid sent. -Chris _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 10+ messages in thread
* [igt-dev] ✓ Fi.CI.BAT: success for igt/gem_eio: Measure reset delay from thread 2018-07-27 11:13 [igt-dev] [PATCH i-g-t] igt/gem_eio: Measure reset delay from thread Chris Wilson 2018-07-27 13:24 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork 2018-07-27 14:52 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork @ 2018-07-30 16:20 ` Patchwork 2018-07-30 18:29 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork 2018-08-03 14:03 ` [igt-dev] [PATCH i-g-t] " Tvrtko Ursulin 4 siblings, 0 replies; 10+ messages in thread From: Patchwork @ 2018-07-30 16:20 UTC (permalink / raw) To: Chris Wilson; +Cc: igt-dev == Series Details == Series: igt/gem_eio: Measure reset delay from thread URL : https://patchwork.freedesktop.org/series/47338/ State : success == Summary == = CI Bug Log - changes from CI_DRM_4592 -> IGTPW_1671 = == Summary - SUCCESS == No regressions found. External URL: https://patchwork.freedesktop.org/api/1.0/series/47338/revisions/1/mbox/ == Known issues == Here are the changes found in IGTPW_1671 that come from known issues: === IGT changes === ==== Issues hit ==== igt@drv_module_reload@basic-reload-inject: fi-hsw-4770r: PASS -> DMESG-WARN (fdo#107425) igt@kms_flip@basic-flip-vs-dpms: fi-skl-6700hq: PASS -> DMESG-WARN (fdo#105998) igt@prime_vgem@basic-fence-flip: fi-ilk-650: PASS -> FAIL (fdo#104008) ==== Warnings ==== {igt@kms_psr@primary_page_flip}: fi-cnl-psr: DMESG-WARN (fdo#107372) -> DMESG-FAIL (fdo#107372) {name}: This element is suppressed. This means it is ignored when computing the status of the difference (SUCCESS, WARNING, or FAILURE). fdo#104008 https://bugs.freedesktop.org/show_bug.cgi?id=104008 fdo#105998 https://bugs.freedesktop.org/show_bug.cgi?id=105998 fdo#107372 https://bugs.freedesktop.org/show_bug.cgi?id=107372 fdo#107425 https://bugs.freedesktop.org/show_bug.cgi?id=107425 == Participating hosts (52 -> 47) == Additional (1): fi-kbl-8809g Missing (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper == Build changes == * IGT: IGT_4580 -> IGTPW_1671 CI_DRM_4592: ed4b6c987889caf5a4041cf78eacdede85964577 @ git://anongit.freedesktop.org/gfx-ci/linux IGTPW_1671: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1671/ IGT_4580: f1c868dae24056ebc27e4f3c197724ce9b956a8a @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1671/issues.html _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 10+ messages in thread
* [igt-dev] ✓ Fi.CI.IGT: success for igt/gem_eio: Measure reset delay from thread 2018-07-27 11:13 [igt-dev] [PATCH i-g-t] igt/gem_eio: Measure reset delay from thread Chris Wilson ` (2 preceding siblings ...) 2018-07-30 16:20 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork @ 2018-07-30 18:29 ` Patchwork 2018-07-30 18:33 ` Chris Wilson 2018-08-03 14:03 ` [igt-dev] [PATCH i-g-t] " Tvrtko Ursulin 4 siblings, 1 reply; 10+ messages in thread From: Patchwork @ 2018-07-30 18:29 UTC (permalink / raw) To: Chris Wilson; +Cc: igt-dev == Series Details == Series: igt/gem_eio: Measure reset delay from thread URL : https://patchwork.freedesktop.org/series/47338/ State : success == Summary == = CI Bug Log - changes from IGT_4580_full -> IGTPW_1671_full = == Summary - WARNING == Minor unknown changes coming with IGTPW_1671_full need to be verified manually. If you think the reported changes have nothing to do with the changes introduced in IGTPW_1671_full, please notify your bug team to allow them to document this new failure mode, which will reduce false positives in CI. External URL: https://patchwork.freedesktop.org/api/1.0/series/47338/revisions/1/mbox/ == Possible new issues == Here are the unknown changes that may have been introduced in IGTPW_1671_full: === IGT changes === ==== Warnings ==== igt@kms_vblank@pipe-b-accuracy-idle: shard-snb: PASS -> SKIP +1 igt@pm_rc6_residency@rc6-accuracy: shard-kbl: PASS -> SKIP shard-snb: SKIP -> PASS == Known issues == Here are the changes found in IGTPW_1671_full that come from known issues: === IGT changes === ==== Issues hit ==== igt@gem_eio@wait-immediate: shard-snb: PASS -> INCOMPLETE (fdo#105411) igt@gem_ppgtt@blt-vs-render-ctx0: shard-kbl: PASS -> INCOMPLETE (fdo#103665, fdo#106023) igt@kms_frontbuffer_tracking@fbc-1p-offscren-pri-indfb-draw-blt: shard-glk: PASS -> FAIL (fdo#103167) igt@kms_setmode@basic: shard-apl: PASS -> FAIL (fdo#99912) igt@kms_vblank@pipe-c-ts-continuation-suspend: shard-kbl: PASS -> INCOMPLETE (fdo#103665) igt@testdisplay: shard-glk: PASS -> INCOMPLETE (k.org#198133, fdo#107093, fdo#103359) ==== Possible fixes ==== igt@kms_flip@plain-flip-fb-recreate-interruptible: shard-glk: FAIL (fdo#100368) -> PASS igt@kms_plane_multiple@atomic-pipe-a-tiling-x: shard-snb: FAIL (fdo#103166) -> PASS igt@kms_rotation_crc@sprite-rotation-180: shard-snb: FAIL (fdo#103925) -> PASS +1 fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368 fdo#103166 https://bugs.freedesktop.org/show_bug.cgi?id=103166 fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167 fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359 fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665 fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925 fdo#105411 https://bugs.freedesktop.org/show_bug.cgi?id=105411 fdo#106023 https://bugs.freedesktop.org/show_bug.cgi?id=106023 fdo#107093 https://bugs.freedesktop.org/show_bug.cgi?id=107093 fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912 k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133 == Participating hosts (5 -> 5) == No changes in participating hosts == Build changes == * IGT: IGT_4580 -> IGTPW_1671 * Linux: CI_DRM_4562 -> CI_DRM_4592 CI_DRM_4562: 99bbd80f75cdcf28699ffd3c93a714ca4a89b962 @ git://anongit.freedesktop.org/gfx-ci/linux CI_DRM_4592: ed4b6c987889caf5a4041cf78eacdede85964577 @ git://anongit.freedesktop.org/gfx-ci/linux IGTPW_1671: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1671/ IGT_4580: f1c868dae24056ebc27e4f3c197724ce9b956a8a @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1671/shards.html _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [igt-dev] ✓ Fi.CI.IGT: success for igt/gem_eio: Measure reset delay from thread 2018-07-30 18:29 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork @ 2018-07-30 18:33 ` Chris Wilson 0 siblings, 0 replies; 10+ messages in thread From: Chris Wilson @ 2018-07-30 18:33 UTC (permalink / raw) To: Patchwork, igt-dev Quoting Patchwork (2018-07-30 19:29:16) > igt@gem_eio@wait-immediate: > shard-snb: PASS -> INCOMPLETE (fdo#105411) > No logs, not even on the paths affected by the patch. Please review kindly. -Chris _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [igt-dev] [PATCH i-g-t] igt/gem_eio: Measure reset delay from thread 2018-07-27 11:13 [igt-dev] [PATCH i-g-t] igt/gem_eio: Measure reset delay from thread Chris Wilson ` (3 preceding siblings ...) 2018-07-30 18:29 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork @ 2018-08-03 14:03 ` Tvrtko Ursulin 2018-08-03 14:10 ` Chris Wilson 4 siblings, 1 reply; 10+ messages in thread From: Tvrtko Ursulin @ 2018-08-03 14:03 UTC (permalink / raw) To: Chris Wilson, intel-gfx; +Cc: igt-dev On 27/07/2018 12:13, Chris Wilson wrote: > We assert that we complete a wedge within 250ms. However, when we use a > thread to delay the wedging until after we start waiting, that thread > itself is delayed longer than our wait timeout. This results in a false > positive error where we fail the test before we even trigger the reset. > > Reorder the test so that we only ever measure the delay from triggering > the reset until we wakeup, and assert that is in a timely fashion > (less than 250ms). > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105954 > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > tests/gem_eio.c | 45 ++++++++++++++++++++++++--------------------- > 1 file changed, 24 insertions(+), 21 deletions(-) > > diff --git a/tests/gem_eio.c b/tests/gem_eio.c > index 3162a3170..b26b4b895 100644 > --- a/tests/gem_eio.c > +++ b/tests/gem_eio.c > @@ -190,7 +190,8 @@ static igt_spin_t * spin_sync(int fd, uint32_t ctx, unsigned long flags) > > struct hang_ctx { > int debugfs; > - struct timespec ts; > + struct timespec delay; > + struct timespec *ts; > timer_t timer; > }; > > @@ -198,8 +199,10 @@ static void hang_handler(union sigval arg) > { > struct hang_ctx *ctx = arg.sival_ptr; > > - igt_debug("hang delay = %.2fus\n", igt_nsec_elapsed(&ctx->ts) / 1000.0); > + igt_debug("hang delay = %.2fus\n", > + igt_nsec_elapsed(&ctx->delay) / 1000.0); > > + igt_nsec_elapsed(ctx->ts); > igt_assert(igt_sysfs_set(ctx->debugfs, "i915_wedged", "-1")); > > igt_assert_eq(timer_delete(ctx->timer), 0); > @@ -207,7 +210,7 @@ static void hang_handler(union sigval arg) > free(ctx); > } > > -static void hang_after(int fd, unsigned int us) > +static void hang_after(int fd, unsigned int us, struct timespec *ts) > { > struct sigevent sev = { > .sigev_notify = SIGEV_THREAD, > @@ -229,30 +232,30 @@ static void hang_after(int fd, unsigned int us) > > igt_assert_eq(timer_create(CLOCK_MONOTONIC, &sev, &ctx->timer), 0); > > - igt_nsec_elapsed(&ctx->ts); > + ctx->ts = ts; > + igt_nsec_elapsed(&ctx->delay); > > igt_assert_eq(timer_settime(ctx->timer, 0, &its, NULL), 0); > } > > -static int __check_wait(int fd, uint32_t bo, unsigned int wait) > +static void __check_wait(int fd, uint32_t bo, unsigned int wait) > { > - unsigned long wait_timeout = 250e6; /* Some margin for actual reset. */ > - int ret; > + struct timespec ts = {}; > + uint64_t elapsed; > > if (wait) { > - /* > - * Double the wait plus some fixed margin to ensure gem_wait > - * can never time out before the async hang runs. > - */ > - wait_timeout += wait * 2000 + 250e6; > - hang_after(fd, wait); > + hang_after(fd, wait, &ts); > } else { > + igt_nsec_elapsed(&ts); > manual_hang(fd); > } > > - ret = __gem_wait(fd, bo, wait_timeout); > + gem_sync(fd, bo); One drawback I can see is if reset fails to work then we hang until IGT runner timeout. Alternative would be a smaller patch to bump the wait delay which should work given how rare are the failures. It was one of the goals of the gem_eio rewrite I think, but whatever, I suppose runtime of test when reset is broken is not that important. Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko > > - return ret; > + elapsed = igt_nsec_elapsed(&ts); > + igt_assert_f(elapsed < 250e6, > + "Wake up following reset+wedge took %.3fms\n", > + elapsed*1e-6); > } > > static void __test_banned(int fd) > @@ -322,7 +325,7 @@ static void test_wait(int fd, unsigned int flags, unsigned int wait) > > hang = spin_sync(fd, 0, I915_EXEC_DEFAULT); > > - igt_assert_eq(__check_wait(fd, hang->handle, wait), 0); > + __check_wait(fd, hang->handle, wait); > > igt_spin_batch_free(fd, hang); > > @@ -392,7 +395,7 @@ static void test_inflight(int fd, unsigned int wait) > igt_assert(fence[n] != -1); > } > > - igt_assert_eq(__check_wait(fd, obj[1].handle, wait), 0); > + __check_wait(fd, obj[1].handle, wait); > > for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) { > igt_assert_eq(sync_fence_status(fence[n]), -EIO); > @@ -443,7 +446,7 @@ static void test_inflight_suspend(int fd) > igt_set_autoresume_delay(30); > igt_system_suspend_autoresume(SUSPEND_STATE_MEM, SUSPEND_TEST_NONE); > > - igt_assert_eq(__check_wait(fd, obj[1].handle, 10), 0); > + __check_wait(fd, obj[1].handle, 10); > > for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) { > igt_assert_eq(sync_fence_status(fence[n]), -EIO); > @@ -521,7 +524,7 @@ static void test_inflight_contexts(int fd, unsigned int wait) > igt_assert(fence[n] != -1); > } > > - igt_assert_eq(__check_wait(fd, obj[1].handle, wait), 0); > + __check_wait(fd, obj[1].handle, wait); > > for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) { > igt_assert_eq(sync_fence_status(fence[n]), -EIO); > @@ -630,7 +633,7 @@ static void test_inflight_internal(int fd, unsigned int wait) > nfence++; > } > > - igt_assert_eq(__check_wait(fd, obj[1].handle, wait), 0); > + __check_wait(fd, obj[1].handle, wait); > > while (nfence--) { > igt_assert_eq(sync_fence_status(fences[nfence]), -EIO); > @@ -682,7 +685,7 @@ static void test_reset_stress(int fd, unsigned int flags) > gem_execbuf(fd, &execbuf); > > /* Wedge after a small delay. */ > - igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0); > + __check_wait(fd, obj.handle, 100e3); > > /* Unwedge by forcing a reset. */ > igt_assert(i915_reset_control(true)); > _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [igt-dev] [PATCH i-g-t] igt/gem_eio: Measure reset delay from thread 2018-08-03 14:03 ` [igt-dev] [PATCH i-g-t] " Tvrtko Ursulin @ 2018-08-03 14:10 ` Chris Wilson 0 siblings, 0 replies; 10+ messages in thread From: Chris Wilson @ 2018-08-03 14:10 UTC (permalink / raw) To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev Quoting Tvrtko Ursulin (2018-08-03 15:03:05) > > On 27/07/2018 12:13, Chris Wilson wrote: > > We assert that we complete a wedge within 250ms. However, when we use a > > thread to delay the wedging until after we start waiting, that thread > > itself is delayed longer than our wait timeout. This results in a false > > positive error where we fail the test before we even trigger the reset. > > > > Reorder the test so that we only ever measure the delay from triggering > > the reset until we wakeup, and assert that is in a timely fashion > > (less than 250ms). > > > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105954 > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > tests/gem_eio.c | 45 ++++++++++++++++++++++++--------------------- > > 1 file changed, 24 insertions(+), 21 deletions(-) > > > > diff --git a/tests/gem_eio.c b/tests/gem_eio.c > > index 3162a3170..b26b4b895 100644 > > --- a/tests/gem_eio.c > > +++ b/tests/gem_eio.c > > @@ -190,7 +190,8 @@ static igt_spin_t * spin_sync(int fd, uint32_t ctx, unsigned long flags) > > > > struct hang_ctx { > > int debugfs; > > - struct timespec ts; > > + struct timespec delay; > > + struct timespec *ts; > > timer_t timer; > > }; > > > > @@ -198,8 +199,10 @@ static void hang_handler(union sigval arg) > > { > > struct hang_ctx *ctx = arg.sival_ptr; > > > > - igt_debug("hang delay = %.2fus\n", igt_nsec_elapsed(&ctx->ts) / 1000.0); > > + igt_debug("hang delay = %.2fus\n", > > + igt_nsec_elapsed(&ctx->delay) / 1000.0); > > > > + igt_nsec_elapsed(ctx->ts); > > igt_assert(igt_sysfs_set(ctx->debugfs, "i915_wedged", "-1")); > > > > igt_assert_eq(timer_delete(ctx->timer), 0); > > @@ -207,7 +210,7 @@ static void hang_handler(union sigval arg) > > free(ctx); > > } > > > > -static void hang_after(int fd, unsigned int us) > > +static void hang_after(int fd, unsigned int us, struct timespec *ts) > > { > > struct sigevent sev = { > > .sigev_notify = SIGEV_THREAD, > > @@ -229,30 +232,30 @@ static void hang_after(int fd, unsigned int us) > > > > igt_assert_eq(timer_create(CLOCK_MONOTONIC, &sev, &ctx->timer), 0); > > > > - igt_nsec_elapsed(&ctx->ts); > > + ctx->ts = ts; > > + igt_nsec_elapsed(&ctx->delay); > > > > igt_assert_eq(timer_settime(ctx->timer, 0, &its, NULL), 0); > > } > > > > -static int __check_wait(int fd, uint32_t bo, unsigned int wait) > > +static void __check_wait(int fd, uint32_t bo, unsigned int wait) > > { > > - unsigned long wait_timeout = 250e6; /* Some margin for actual reset. */ > > - int ret; > > + struct timespec ts = {}; > > + uint64_t elapsed; > > > > if (wait) { > > - /* > > - * Double the wait plus some fixed margin to ensure gem_wait > > - * can never time out before the async hang runs. > > - */ > > - wait_timeout += wait * 2000 + 250e6; > > - hang_after(fd, wait); > > + hang_after(fd, wait, &ts); > > } else { > > + igt_nsec_elapsed(&ts); > > manual_hang(fd); > > } > > > > - ret = __gem_wait(fd, bo, wait_timeout); > > + gem_sync(fd, bo); > > One drawback I can see is if reset fails to work then we hang until IGT > runner timeout. Alternative would be a smaller patch to bump the wait > delay which should work given how rare are the failures. It was one of > the goals of the gem_eio rewrite I think, but whatever, I suppose > runtime of test when reset is broken is not that important. It hard to put an upper bound on just how much the scheduler is going to screw us over. On the other hand, this let's us say precisely which portion must be done in the timely fashion (reset -> wakeup), and for that bound can be invariant. Which I think is nice. The downside is indeed that instead of getting a clear fail with explanation if the reset fails completely (not even able to wedge the device), and that hangcheck itself fails to wedge eventually, then we do rely on the external runner to declare the test broken. I think that's fair, it requires a series of events to fail; and in particular hangcheck is supposed to be guarding against even its own failure. -Chris _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2018-08-03 14:10 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2018-07-27 11:13 [igt-dev] [PATCH i-g-t] igt/gem_eio: Measure reset delay from thread Chris Wilson 2018-07-27 13:24 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork 2018-07-27 14:52 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork 2018-07-27 15:16 ` Chris Wilson 2018-07-27 18:44 ` Chris Wilson 2018-07-30 16:20 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork 2018-07-30 18:29 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork 2018-07-30 18:33 ` Chris Wilson 2018-08-03 14:03 ` [igt-dev] [PATCH i-g-t] " Tvrtko Ursulin 2018-08-03 14:10 ` Chris Wilson
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).