All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: igt-dev@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH i-g-t 1/2] i915/gem_eio: Check average reset times
Date: Fri, 15 Feb 2019 16:40:30 +0200	[thread overview]
Message-ID: <87h8d5yv1d.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20190212205740.20368-1-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> As we have moved to rcu/srcu to serialise the resets, individual resets
> are subject to small variations in system grace periods. Allow for this
> by only expecting the median reset time to be within our target, thereby
> excluding noisy outliers from perturbing our results (but keep the
> maximum capped to prevent horrid failures!)
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  tests/i915/gem_eio.c | 53 ++++++++++++++++++++++++++++++++++----------
>  1 file changed, 41 insertions(+), 12 deletions(-)
>
> diff --git a/tests/i915/gem_eio.c b/tests/i915/gem_eio.c
> index 61054a07e..bd5266104 100644
> --- a/tests/i915/gem_eio.c
> +++ b/tests/i915/gem_eio.c
> @@ -42,6 +42,7 @@
>  
>  #include "igt.h"
>  #include "igt_device.h"
> +#include "igt_stats.h"
>  #include "igt_sysfs.h"
>  #include "sw_sync.h"
>  #include "i915/gem_ring.h"
> @@ -239,10 +240,9 @@ static void hang_after(int fd, unsigned int us, struct timespec *ts)
>  	igt_assert_eq(timer_settime(ctx->timer, 0, &its, NULL), 0);
>  }
>  
> -static void check_wait(int fd, uint32_t bo, unsigned int wait)
> +static void check_wait(int fd, uint32_t bo, unsigned int wait, igt_stats_t *st)
>  {
>  	struct timespec ts = {};
> -	uint64_t elapsed;
>  
>  	if (wait) {
>  		hang_after(fd, wait, &ts);
> @@ -253,10 +253,34 @@ static void check_wait(int fd, uint32_t bo, unsigned int wait)
>  
>  	gem_sync(fd, bo);
>  
> -	elapsed = igt_nsec_elapsed(&ts);
> -	igt_assert_f(elapsed < 250e6,
> -		     "Wake up following reset+wedge took %.3fms\n",
> -		     elapsed*1e-6);
> +	if (st)
> +		igt_stats_push(st, igt_nsec_elapsed(&ts));

You prolly want igt_nsec_elapsed() primed regardless
of wait, otherwise I see -1 pushed polluting the stats.

-Mika


> +}
> +
> +static void check_wait_elapsed(int fd, igt_stats_t *st)
> +{
> +	double med, max;
> +
> +	igt_info("Completed %d resets, wakeups took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms)\n",
> +		 st->n_values,
> +		 igt_stats_get_mean(st)*1e-6,
> +		 igt_stats_get_std_deviation(st)*1e-6,
> +		 igt_stats_get_min(st)*1e-6,
> +		 igt_stats_get_median(st)*1e-6,
> +		 igt_stats_get_max(st)*1e-6);
> +
> +	if (st->n_values < 9)
> +		return; /* too few for stable median */
> +
> +	med = igt_stats_get_median(st);
> +	max = igt_stats_get_max(st);
> +	igt_assert_f(med < 250e6 && max < 1250e6,
> +		     "Wake up following reset+wedge took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms)\n",
> +		     igt_stats_get_mean(st)*1e-6,
> +		     igt_stats_get_std_deviation(st)*1e-6,
> +		     igt_stats_get_min(st)*1e-6,
> +		     igt_stats_get_median(st)*1e-6,
> +		     igt_stats_get_max(st)*1e-6);
>  }
>  
>  static void __test_banned(int fd)
> @@ -326,7 +350,7 @@ static void test_wait(int fd, unsigned int flags, unsigned int wait)
>  
>  	hang = spin_sync(fd, 0, I915_EXEC_DEFAULT);
>  
> -	check_wait(fd, hang->handle, wait);
> +	check_wait(fd, hang->handle, wait, NULL);
>  
>  	igt_spin_batch_free(fd, hang);
>  
> @@ -401,7 +425,7 @@ static void test_inflight(int fd, unsigned int wait)
>  			igt_assert(fence[n] != -1);
>  		}
>  
> -		check_wait(fd, obj[1].handle, wait);
> +		check_wait(fd, obj[1].handle, wait, NULL);
>  
>  		for (unsigned int n = 0; n < max; n++) {
>  			igt_assert_eq(sync_fence_status(fence[n]), -EIO);
> @@ -457,7 +481,7 @@ static void test_inflight_suspend(int fd)
>  	igt_set_autoresume_delay(30);
>  	igt_system_suspend_autoresume(SUSPEND_STATE_MEM, SUSPEND_TEST_NONE);
>  
> -	check_wait(fd, obj[1].handle, 10);
> +	check_wait(fd, obj[1].handle, 10, NULL);
>  
>  	for (unsigned int n = 0; n < max; n++) {
>  		igt_assert_eq(sync_fence_status(fence[n]), -EIO);
> @@ -535,7 +559,7 @@ static void test_inflight_contexts(int fd, unsigned int wait)
>  			igt_assert(fence[n] != -1);
>  		}
>  
> -		check_wait(fd, obj[1].handle, wait);
> +		check_wait(fd, obj[1].handle, wait, NULL);
>  
>  		for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
>  			igt_assert_eq(sync_fence_status(fence[n]), -EIO);
> @@ -644,7 +668,7 @@ static void test_inflight_internal(int fd, unsigned int wait)
>  		nfence++;
>  	}
>  
> -	check_wait(fd, obj[1].handle, wait);
> +	check_wait(fd, obj[1].handle, wait, NULL);
>  
>  	while (nfence--) {
>  		igt_assert_eq(sync_fence_status(fences[nfence]), -EIO);
> @@ -670,8 +694,11 @@ static void reset_stress(int fd,
>  		.buffer_count = 1,
>  		.flags = engine,
>  	};
> +	igt_stats_t stats;
> +
>  	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
>  
> +	igt_stats_init(&stats);
>  	igt_until_timeout(5) {
>  		uint32_t ctx = context_create_safe(fd);
>  		igt_spin_t *hang;
> @@ -697,7 +724,7 @@ static void reset_stress(int fd,
>  			gem_execbuf(fd, &execbuf);
>  
>  		/* Wedge after a small delay. */
> -		check_wait(fd, obj.handle, 100e3);
> +		check_wait(fd, obj.handle, 100e3, &stats);
>  
>  		/* Unwedge by forcing a reset. */
>  		igt_assert(i915_reset_control(true));
> @@ -721,6 +748,8 @@ static void reset_stress(int fd,
>  		igt_spin_batch_free(fd, hang);
>  		gem_context_destroy(fd, ctx);
>  	}
> +	check_wait_elapsed(fd, &stats);
> +	igt_stats_fini(&stats);
>  
>  	gem_close(fd, obj.handle);
>  }
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

WARNING: multiple messages have this Message-ID (diff)
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: igt-dev@lists.freedesktop.org
Subject: Re: [PATCH i-g-t 1/2] i915/gem_eio: Check average reset times
Date: Fri, 15 Feb 2019 16:40:30 +0200	[thread overview]
Message-ID: <87h8d5yv1d.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20190212205740.20368-1-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> As we have moved to rcu/srcu to serialise the resets, individual resets
> are subject to small variations in system grace periods. Allow for this
> by only expecting the median reset time to be within our target, thereby
> excluding noisy outliers from perturbing our results (but keep the
> maximum capped to prevent horrid failures!)
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  tests/i915/gem_eio.c | 53 ++++++++++++++++++++++++++++++++++----------
>  1 file changed, 41 insertions(+), 12 deletions(-)
>
> diff --git a/tests/i915/gem_eio.c b/tests/i915/gem_eio.c
> index 61054a07e..bd5266104 100644
> --- a/tests/i915/gem_eio.c
> +++ b/tests/i915/gem_eio.c
> @@ -42,6 +42,7 @@
>  
>  #include "igt.h"
>  #include "igt_device.h"
> +#include "igt_stats.h"
>  #include "igt_sysfs.h"
>  #include "sw_sync.h"
>  #include "i915/gem_ring.h"
> @@ -239,10 +240,9 @@ static void hang_after(int fd, unsigned int us, struct timespec *ts)
>  	igt_assert_eq(timer_settime(ctx->timer, 0, &its, NULL), 0);
>  }
>  
> -static void check_wait(int fd, uint32_t bo, unsigned int wait)
> +static void check_wait(int fd, uint32_t bo, unsigned int wait, igt_stats_t *st)
>  {
>  	struct timespec ts = {};
> -	uint64_t elapsed;
>  
>  	if (wait) {
>  		hang_after(fd, wait, &ts);
> @@ -253,10 +253,34 @@ static void check_wait(int fd, uint32_t bo, unsigned int wait)
>  
>  	gem_sync(fd, bo);
>  
> -	elapsed = igt_nsec_elapsed(&ts);
> -	igt_assert_f(elapsed < 250e6,
> -		     "Wake up following reset+wedge took %.3fms\n",
> -		     elapsed*1e-6);
> +	if (st)
> +		igt_stats_push(st, igt_nsec_elapsed(&ts));

You prolly want igt_nsec_elapsed() primed regardless
of wait, otherwise I see -1 pushed polluting the stats.

-Mika


> +}
> +
> +static void check_wait_elapsed(int fd, igt_stats_t *st)
> +{
> +	double med, max;
> +
> +	igt_info("Completed %d resets, wakeups took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms)\n",
> +		 st->n_values,
> +		 igt_stats_get_mean(st)*1e-6,
> +		 igt_stats_get_std_deviation(st)*1e-6,
> +		 igt_stats_get_min(st)*1e-6,
> +		 igt_stats_get_median(st)*1e-6,
> +		 igt_stats_get_max(st)*1e-6);
> +
> +	if (st->n_values < 9)
> +		return; /* too few for stable median */
> +
> +	med = igt_stats_get_median(st);
> +	max = igt_stats_get_max(st);
> +	igt_assert_f(med < 250e6 && max < 1250e6,
> +		     "Wake up following reset+wedge took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms)\n",
> +		     igt_stats_get_mean(st)*1e-6,
> +		     igt_stats_get_std_deviation(st)*1e-6,
> +		     igt_stats_get_min(st)*1e-6,
> +		     igt_stats_get_median(st)*1e-6,
> +		     igt_stats_get_max(st)*1e-6);
>  }
>  
>  static void __test_banned(int fd)
> @@ -326,7 +350,7 @@ static void test_wait(int fd, unsigned int flags, unsigned int wait)
>  
>  	hang = spin_sync(fd, 0, I915_EXEC_DEFAULT);
>  
> -	check_wait(fd, hang->handle, wait);
> +	check_wait(fd, hang->handle, wait, NULL);
>  
>  	igt_spin_batch_free(fd, hang);
>  
> @@ -401,7 +425,7 @@ static void test_inflight(int fd, unsigned int wait)
>  			igt_assert(fence[n] != -1);
>  		}
>  
> -		check_wait(fd, obj[1].handle, wait);
> +		check_wait(fd, obj[1].handle, wait, NULL);
>  
>  		for (unsigned int n = 0; n < max; n++) {
>  			igt_assert_eq(sync_fence_status(fence[n]), -EIO);
> @@ -457,7 +481,7 @@ static void test_inflight_suspend(int fd)
>  	igt_set_autoresume_delay(30);
>  	igt_system_suspend_autoresume(SUSPEND_STATE_MEM, SUSPEND_TEST_NONE);
>  
> -	check_wait(fd, obj[1].handle, 10);
> +	check_wait(fd, obj[1].handle, 10, NULL);
>  
>  	for (unsigned int n = 0; n < max; n++) {
>  		igt_assert_eq(sync_fence_status(fence[n]), -EIO);
> @@ -535,7 +559,7 @@ static void test_inflight_contexts(int fd, unsigned int wait)
>  			igt_assert(fence[n] != -1);
>  		}
>  
> -		check_wait(fd, obj[1].handle, wait);
> +		check_wait(fd, obj[1].handle, wait, NULL);
>  
>  		for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
>  			igt_assert_eq(sync_fence_status(fence[n]), -EIO);
> @@ -644,7 +668,7 @@ static void test_inflight_internal(int fd, unsigned int wait)
>  		nfence++;
>  	}
>  
> -	check_wait(fd, obj[1].handle, wait);
> +	check_wait(fd, obj[1].handle, wait, NULL);
>  
>  	while (nfence--) {
>  		igt_assert_eq(sync_fence_status(fences[nfence]), -EIO);
> @@ -670,8 +694,11 @@ static void reset_stress(int fd,
>  		.buffer_count = 1,
>  		.flags = engine,
>  	};
> +	igt_stats_t stats;
> +
>  	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
>  
> +	igt_stats_init(&stats);
>  	igt_until_timeout(5) {
>  		uint32_t ctx = context_create_safe(fd);
>  		igt_spin_t *hang;
> @@ -697,7 +724,7 @@ static void reset_stress(int fd,
>  			gem_execbuf(fd, &execbuf);
>  
>  		/* Wedge after a small delay. */
> -		check_wait(fd, obj.handle, 100e3);
> +		check_wait(fd, obj.handle, 100e3, &stats);
>  
>  		/* Unwedge by forcing a reset. */
>  		igt_assert(i915_reset_control(true));
> @@ -721,6 +748,8 @@ static void reset_stress(int fd,
>  		igt_spin_batch_free(fd, hang);
>  		gem_context_destroy(fd, ctx);
>  	}
> +	check_wait_elapsed(fd, &stats);
> +	igt_stats_fini(&stats);
>  
>  	gem_close(fd, obj.handle);
>  }
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2019-02-15 14:40 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-12 20:57 [igt-dev] [PATCH i-g-t 1/2] i915/gem_eio: Check average reset times Chris Wilson
2019-02-12 20:57 ` Chris Wilson
2019-02-12 20:57 ` [igt-dev] [PATCH i-g-t 2/2] i915/gem_eio: Allow older platforms more leniency in reset latency Chris Wilson
2019-02-12 20:57   ` Chris Wilson
2019-02-15 14:43   ` [Intel-gfx] " Mika Kuoppala
2019-02-15 14:43     ` Mika Kuoppala
2019-02-12 21:22 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] i915/gem_eio: Check average reset times Patchwork
2019-02-12 23:12 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
2019-02-15 14:40 ` Mika Kuoppala [this message]
2019-02-15 14:40   ` [PATCH i-g-t 1/2] " Mika Kuoppala
2019-02-15 14:52   ` [igt-dev] [Intel-gfx] " Chris Wilson
2019-02-15 14:52     ` Chris Wilson
2019-02-15 15:02     ` [igt-dev] [Intel-gfx] " Mika Kuoppala via igt-dev
2019-02-15 15:02       ` Mika Kuoppala

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87h8d5yv1d.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.