[PATCH i-g-t] tests/perf_pmu: Test busyness reporting in face of GPU hangs

public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed

* [PATCH i-g-t] tests/perf_pmu: Test busyness reporting in face of GPU hangs
@ 2018-02-19 19:12 Tvrtko Ursulin
  2018-02-19 19:21 ` [igt-dev] " Chris Wilson
  0 siblings, 1 reply; 8+ messages in thread
From: Tvrtko Ursulin @ 2018-02-19 19:12 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Verify that the reported busyness is in line with what would we expect
from a batch which causes a hang and gets kicked out from the engine.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 7fab73e22c2d..90b6ec4db32d 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -168,6 +168,7 @@ static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e)
 #define TEST_TRAILING_IDLE (4)
 #define TEST_RUNTIME_PM (8)
 #define FLAG_LONG (16)
+#define FLAG_HANG (32)
 
 static void end_spin(int fd, igt_spin_t *spin, unsigned int flags)
 {
@@ -186,11 +187,15 @@ static void end_spin(int fd, igt_spin_t *spin, unsigned int flags)
 static void
 single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
 {
+	const unsigned int hang_us = 10e6;
 	unsigned long slept;
 	igt_spin_t *spin;
-	uint64_t val;
+	uint64_t val[2], ts[2];
 	int fd;
 
+	if (flags & FLAG_HANG)
+		gem_quiescent_gpu(gem_fd);
+
 	fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
 
 	if (flags & TEST_BUSY)
@@ -198,17 +203,36 @@ single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
 	else
 		spin = NULL;
 
-	val = pmu_read_single(fd);
-	slept = measured_usleep(batch_duration_ns / 1000);
+	val[0] = __pmu_read_single(fd, &ts[0]);
+	slept = measured_usleep(flags & FLAG_HANG ?
+				hang_us : batch_duration_ns / 1000);
 	if (flags & TEST_TRAILING_IDLE)
 		end_spin(gem_fd, spin, flags);
-	val = pmu_read_single(fd) - val;
+	val[1] = pmu_read_single(fd);
 
 	end_spin(gem_fd, spin, FLAG_SYNC);
 	igt_spin_batch_free(gem_fd, spin);
-	close(fd);
 
-	assert_within_epsilon(val, flags & TEST_BUSY ? slept : 0.f, tolerance);
+	if ((flags & TEST_BUSY) && (flags & FLAG_HANG)) {
+		val[1] = __pmu_read_single(fd, &ts[1]);
+		close(fd);
+		igt_info("sampled with hang %.3fms / %.3fms\n",
+			 (val[1] - val[0]) / 1e6, (ts[1] - ts[0]) / 1e6);
+		/* Check that some busyness was reported. */
+		igt_assert(val[1] - val[0] > 0);
+		/*
+		 * But not more than some reasonable value before which we
+		 * expected the spinner to be kicked out.
+		 */
+		igt_assert((val[1] - val[0]) / 1e3 < (double)hang_us * 0.75);
+		__assert_within_epsilon(val[1] - val[0], hang_us * 1e3,
+					0.02f, 10.0f);
+	} else {
+		close(fd);
+		assert_within_epsilon(val[1] - val[0],
+				      flags & TEST_BUSY ?
+				      slept : 0.f, tolerance);
+	}
 	gem_quiescent_gpu(gem_fd);
 }
 
@@ -1695,6 +1719,11 @@ igt_main
 					      pct[i], e->name)
 					accuracy(fd, e, pct[i]);
 			}
+
+			igt_subtest_f("busy-hang-%s", e->name) {
+				single(fd, e, TEST_BUSY | FLAG_HANG);
+				single(fd, e, TEST_BUSY | FLAG_HANG);
+			}
 		}
 
 		/**
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] tests/perf_pmu: Test busyness reporting in face of GPU hangs
  2018-02-19 19:12 [PATCH i-g-t] tests/perf_pmu: Test busyness reporting in face of GPU hangs Tvrtko Ursulin
@ 2018-02-19 19:21 ` Chris Wilson
  2018-02-19 19:26   ` Tvrtko Ursulin
  2018-02-28 17:15   ` [PATCH i-g-t v2] " Tvrtko Ursulin
  0 siblings, 2 replies; 8+ messages in thread
From: Chris Wilson @ 2018-02-19 19:21 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-02-19 19:12:51)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Verify that the reported busyness is in line with what would we expect
> from a batch which causes a hang and gets kicked out from the engine.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  tests/perf_pmu.c | 41 +++++++++++++++++++++++++++++++++++------
>  1 file changed, 35 insertions(+), 6 deletions(-)
> 
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index 7fab73e22c2d..90b6ec4db32d 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -168,6 +168,7 @@ static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e)
>  #define TEST_TRAILING_IDLE (4)
>  #define TEST_RUNTIME_PM (8)
>  #define FLAG_LONG (16)
> +#define FLAG_HANG (32)
>  
>  static void end_spin(int fd, igt_spin_t *spin, unsigned int flags)
>  {
> @@ -186,11 +187,15 @@ static void end_spin(int fd, igt_spin_t *spin, unsigned int flags)
>  static void
>  single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
>  {
> +       const unsigned int hang_us = 10e6;
>         unsigned long slept;
>         igt_spin_t *spin;
> -       uint64_t val;
> +       uint64_t val[2], ts[2];
>         int fd;
>  
> +       if (flags & FLAG_HANG)
> +               gem_quiescent_gpu(gem_fd);
> +
>         fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
>  
>         if (flags & TEST_BUSY)
> @@ -198,17 +203,36 @@ single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
>         else
>                 spin = NULL;
>  
> -       val = pmu_read_single(fd);
> -       slept = measured_usleep(batch_duration_ns / 1000);
> +       val[0] = __pmu_read_single(fd, &ts[0]);
> +       slept = measured_usleep(flags & FLAG_HANG ?
> +                               hang_us : batch_duration_ns / 1000);
>         if (flags & TEST_TRAILING_IDLE)
>                 end_spin(gem_fd, spin, flags);
> -       val = pmu_read_single(fd) - val;
> +       val[1] = pmu_read_single(fd);
>  
>         end_spin(gem_fd, spin, FLAG_SYNC);
>         igt_spin_batch_free(gem_fd, spin);
> -       close(fd);
>  
> -       assert_within_epsilon(val, flags & TEST_BUSY ? slept : 0.f, tolerance);
> +       if ((flags & TEST_BUSY) && (flags & FLAG_HANG)) {
> +               val[1] = __pmu_read_single(fd, &ts[1]);
> +               close(fd);
> +               igt_info("sampled with hang %.3fms / %.3fms\n",
> +                        (val[1] - val[0]) / 1e6, (ts[1] - ts[0]) / 1e6);
> +               /* Check that some busyness was reported. */
> +               igt_assert(val[1] - val[0] > 0);
> +               /*
> +                * But not more than some reasonable value before which we
> +                * expected the spinner to be kicked out.
> +                */

So 120s? And even that carries internal knowledge from across the ages.

I don't think this is a sensible test. What would be reasonable is
something like

	spinner()
	val[0] = pmu()
	sleep()
	igt_force_gpu_reset()
	val[1] = pmu();
	d_busy = val[1] - val[0]
	sleep()
	val[2] = pmu()
	d_idle = val[2] - val[1];

Then d_busy should be d_ts, and d_idle should be 0. i.e. the
igt_force_gpu_reset() is just an indirect igt_spin_batch_end().
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] tests/perf_pmu: Test busyness reporting in face of GPU hangs
  2018-02-19 19:21 ` [igt-dev] " Chris Wilson
@ 2018-02-19 19:26   ` Tvrtko Ursulin
  2018-02-28 17:15   ` [PATCH i-g-t v2] " Tvrtko Ursulin
  1 sibling, 0 replies; 8+ messages in thread
From: Tvrtko Ursulin @ 2018-02-19 19:26 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx


On 19/02/2018 19:21, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-02-19 19:12:51)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Verify that the reported busyness is in line with what would we expect
>> from a batch which causes a hang and gets kicked out from the engine.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   tests/perf_pmu.c | 41 +++++++++++++++++++++++++++++++++++------
>>   1 file changed, 35 insertions(+), 6 deletions(-)
>>
>> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
>> index 7fab73e22c2d..90b6ec4db32d 100644
>> --- a/tests/perf_pmu.c
>> +++ b/tests/perf_pmu.c
>> @@ -168,6 +168,7 @@ static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e)
>>   #define TEST_TRAILING_IDLE (4)
>>   #define TEST_RUNTIME_PM (8)
>>   #define FLAG_LONG (16)
>> +#define FLAG_HANG (32)
>>   
>>   static void end_spin(int fd, igt_spin_t *spin, unsigned int flags)
>>   {
>> @@ -186,11 +187,15 @@ static void end_spin(int fd, igt_spin_t *spin, unsigned int flags)
>>   static void
>>   single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
>>   {
>> +       const unsigned int hang_us = 10e6;
>>          unsigned long slept;
>>          igt_spin_t *spin;
>> -       uint64_t val;
>> +       uint64_t val[2], ts[2];
>>          int fd;
>>   
>> +       if (flags & FLAG_HANG)
>> +               gem_quiescent_gpu(gem_fd);
>> +
>>          fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
>>   
>>          if (flags & TEST_BUSY)
>> @@ -198,17 +203,36 @@ single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
>>          else
>>                  spin = NULL;
>>   
>> -       val = pmu_read_single(fd);
>> -       slept = measured_usleep(batch_duration_ns / 1000);
>> +       val[0] = __pmu_read_single(fd, &ts[0]);
>> +       slept = measured_usleep(flags & FLAG_HANG ?
>> +                               hang_us : batch_duration_ns / 1000);
>>          if (flags & TEST_TRAILING_IDLE)
>>                  end_spin(gem_fd, spin, flags);
>> -       val = pmu_read_single(fd) - val;
>> +       val[1] = pmu_read_single(fd);
>>   
>>          end_spin(gem_fd, spin, FLAG_SYNC);
>>          igt_spin_batch_free(gem_fd, spin);
>> -       close(fd);
>>   
>> -       assert_within_epsilon(val, flags & TEST_BUSY ? slept : 0.f, tolerance);
>> +       if ((flags & TEST_BUSY) && (flags & FLAG_HANG)) {
>> +               val[1] = __pmu_read_single(fd, &ts[1]);
>> +               close(fd);
>> +               igt_info("sampled with hang %.3fms / %.3fms\n",
>> +                        (val[1] - val[0]) / 1e6, (ts[1] - ts[0]) / 1e6);
>> +               /* Check that some busyness was reported. */
>> +               igt_assert(val[1] - val[0] > 0);
>> +               /*
>> +                * But not more than some reasonable value before which we
>> +                * expected the spinner to be kicked out.
>> +                */
> 
> So 120s? And even that carries internal knowledge from across the ages.
> 
> I don't think this is a sensible test. What would be reasonable is
> something like
> 
> 	spinner()
> 	val[0] = pmu()
> 	sleep()
> 	igt_force_gpu_reset()
> 	val[1] = pmu();
> 	d_busy = val[1] - val[0]
> 	sleep()
> 	val[2] = pmu()
> 	d_idle = val[2] - val[1];
> 
> Then d_busy should be d_ts, and d_idle should be 0. i.e. the
> igt_force_gpu_reset() is just an indirect igt_spin_batch_end().

Yeah I am not claiming the test is great. I threw it together quickly 
when I suspected something is going bad. Just want to get some results 
overnight so I can despair tomorrow.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH i-g-t v2] tests/perf_pmu: Test busyness reporting in face of GPU hangs
  2018-02-19 19:21 ` [igt-dev] " Chris Wilson
  2018-02-19 19:26   ` Tvrtko Ursulin
@ 2018-02-28 17:15   ` Tvrtko Ursulin
  2018-03-01  8:08     ` Chris Wilson
  1 sibling, 1 reply; 8+ messages in thread
From: Tvrtko Ursulin @ 2018-02-28 17:15 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Verify that the reported busyness is in line with what would we expect
from a batch which causes a hang and gets kicked out from the engine.

v2: Change to explicit igt_force_gpu_reset instead of guessing when a spin
    batch will hang. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 3bbb18d2f216..f5c70776e2cf 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -168,6 +168,7 @@ static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e)
 #define TEST_TRAILING_IDLE (4)
 #define TEST_RUNTIME_PM (8)
 #define FLAG_LONG (16)
+#define FLAG_HANG (32)
 
 static void end_spin(int fd, igt_spin_t *spin, unsigned int flags)
 {
@@ -204,11 +205,27 @@ single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
 		end_spin(gem_fd, spin, flags);
 	val = pmu_read_single(fd) - val;
 
-	end_spin(gem_fd, spin, FLAG_SYNC);
+	if (flags & FLAG_HANG)
+		igt_force_gpu_reset(gem_fd);
+	else
+		end_spin(gem_fd, spin, FLAG_SYNC);
+
+	assert_within_epsilon(val, flags & TEST_BUSY ? slept : 0.f, tolerance);
+
+	/* Check for idle after hang. */
+	if (flags & FLAG_HANG) {
+		/* Sleep for a bit for reset unwind to settle. */
+		sleep(1);
+		val = pmu_read_single(fd);
+		slept = measured_usleep(batch_duration_ns / 1000);
+		val = pmu_read_single(fd) - val;
+
+		assert_within_epsilon(val, 0, tolerance);
+	}
+
 	igt_spin_batch_free(gem_fd, spin);
 	close(fd);
 
-	assert_within_epsilon(val, flags & TEST_BUSY ? slept : 0.f, tolerance);
 	gem_quiescent_gpu(gem_fd);
 }
 
@@ -1690,6 +1707,9 @@ igt_main
 					      pct[i], e->name)
 					accuracy(fd, e, pct[i]);
 			}
+
+			igt_subtest_f("busy-hang-%s", e->name)
+				single(fd, e, TEST_BUSY | FLAG_HANG);
 		}
 
 		/**
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH i-g-t v2] tests/perf_pmu: Test busyness reporting in face of GPU hangs
  2018-02-28 17:15   ` [PATCH i-g-t v2] " Tvrtko Ursulin
@ 2018-03-01  8:08     ` Chris Wilson
  2018-03-01  9:21       ` Tvrtko Ursulin
  0 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2018-03-01  8:08 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-02-28 17:15:19)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Verify that the reported busyness is in line with what would we expect
> from a batch which causes a hang and gets kicked out from the engine.
> 
> v2: Change to explicit igt_force_gpu_reset instead of guessing when a spin
>     batch will hang. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

It's nice and quick, yes. However, sometime the opposite is true and you
have to wait for the batch you want to start before pulling the trigger.

I'd put a usleep(100) in there /* Wait for batch to execute */ and we
should put the wait-for-execution ability in igt_spin_t. Unfortunately
that requires MI_STORE_DWORD_IMM (or more creativity) limiting it's
availability.

With the sleep issue addressed (commented upon if nothing else),
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH i-g-t v2] tests/perf_pmu: Test busyness reporting in face of GPU hangs
  2018-03-01  8:08     ` Chris Wilson
@ 2018-03-01  9:21       ` Tvrtko Ursulin
  2018-03-01  9:27         ` Chris Wilson
  0 siblings, 1 reply; 8+ messages in thread
From: Tvrtko Ursulin @ 2018-03-01  9:21 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx


On 01/03/2018 08:08, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-02-28 17:15:19)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Verify that the reported busyness is in line with what would we expect
>> from a batch which causes a hang and gets kicked out from the engine.
>>
>> v2: Change to explicit igt_force_gpu_reset instead of guessing when a spin
>>      batch will hang. (Chris Wilson)
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> It's nice and quick, yes. However, sometime the opposite is true and you
> have to wait for the batch you want to start before pulling the trigger.
> 
> I'd put a usleep(100) in there /* Wait for batch to execute */ and we

Hm, but reset is triggered after the first sleep (which checks for 100% 
busy). So even the non-hanging test flavour could be affect if the delay 
before execution is so long. So by this logic this usleep(100) (or more 
for small core CI) should then go to many tests. Only difference in the 
hang flavour is that if it completely failed to run until after the 
reset, then the idle assert would fail. So maybe I should just add an 
assert that the batch is idle before sampling pmu after reset?

Regards,

Tvrtko

> should put the wait-for-execution ability in igt_spin_t. Unfortunately
> that requires MI_STORE_DWORD_IMM (or more creativity) limiting it's
> availability.
> 
> With the sleep issue addressed (commented upon if nothing else),
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> -Chris
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH i-g-t v2] tests/perf_pmu: Test busyness reporting in face of GPU hangs
  2018-03-01  9:21       ` Tvrtko Ursulin
@ 2018-03-01  9:27         ` Chris Wilson
  2018-03-01  9:38           ` [PATCH i-g-t v3] " Tvrtko Ursulin
  0 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2018-03-01  9:27 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-03-01 09:21:52)
> 
> On 01/03/2018 08:08, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-02-28 17:15:19)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> Verify that the reported busyness is in line with what would we expect
> >> from a batch which causes a hang and gets kicked out from the engine.
> >>
> >> v2: Change to explicit igt_force_gpu_reset instead of guessing when a spin
> >>      batch will hang. (Chris Wilson)
> >>
> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > 
> > It's nice and quick, yes. However, sometime the opposite is true and you
> > have to wait for the batch you want to start before pulling the trigger.
> > 
> > I'd put a usleep(100) in there /* Wait for batch to execute */ and we
> 
> Hm, but reset is triggered after the first sleep (which checks for 100% 
> busy). So even the non-hanging test flavour could be affect if the delay 
> before execution is so long. So by this logic this usleep(100) (or more 
> for small core CI) should then go to many tests. Only difference in the 
> hang flavour is that if it completely failed to run until after the 
> reset, then the idle assert would fail. So maybe I should just add an 
> assert that the batch is idle before sampling pmu after reset?

Sneaky. Yes, that will work, just add a comment for the case where it
may fail (reset before batch execution).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH i-g-t v3] tests/perf_pmu: Test busyness reporting in face of GPU hangs
  2018-03-01  9:27         ` Chris Wilson
@ 2018-03-01  9:38           ` Tvrtko Ursulin
  0 siblings, 0 replies; 8+ messages in thread
From: Tvrtko Ursulin @ 2018-03-01  9:38 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Verify that the reported busyness is in line with what would we expect
from a batch which causes a hang and gets kicked out from the engine.

v2: Change to explicit igt_force_gpu_reset instead of guessing when a spin
    batch will hang. (Chris Wilson)

v3: Assert and comment test expectations. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/perf_pmu.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 3bbb18d2f216..4713c98c5d22 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -168,6 +168,7 @@ static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e)
 #define TEST_TRAILING_IDLE (4)
 #define TEST_RUNTIME_PM (8)
 #define FLAG_LONG (16)
+#define FLAG_HANG (32)
 
 static void end_spin(int fd, igt_spin_t *spin, unsigned int flags)
 {
@@ -204,11 +205,35 @@ single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
 		end_spin(gem_fd, spin, flags);
 	val = pmu_read_single(fd) - val;
 
-	end_spin(gem_fd, spin, FLAG_SYNC);
+	if (flags & FLAG_HANG)
+		igt_force_gpu_reset(gem_fd);
+	else
+		end_spin(gem_fd, spin, FLAG_SYNC);
+
+	assert_within_epsilon(val, flags & TEST_BUSY ? slept : 0.f, tolerance);
+
+	/* Check for idle after hang. */
+	if (flags & FLAG_HANG) {
+		/* Sleep for a bit for reset unwind to settle. */
+		usleep(500e3);
+		/*
+		 * Ensure batch was executing before reset, meaning it must be
+		 * idle by now. Unless it did not even manage to start before we
+		 * triggered the reset, in which case the idleness check below
+		 * might fail. The latter is very unlikely since there are two
+		 * sleeps during which it had an opportunity to start.
+		 */
+		igt_assert(!gem_bo_busy(gem_fd, spin->handle));
+		val = pmu_read_single(fd);
+		slept = measured_usleep(batch_duration_ns / 1000);
+		val = pmu_read_single(fd) - val;
+
+		assert_within_epsilon(val, 0, tolerance);
+	}
+
 	igt_spin_batch_free(gem_fd, spin);
 	close(fd);
 
-	assert_within_epsilon(val, flags & TEST_BUSY ? slept : 0.f, tolerance);
 	gem_quiescent_gpu(gem_fd);
 }
 
@@ -1690,6 +1715,9 @@ igt_main
 					      pct[i], e->name)
 					accuracy(fd, e, pct[i]);
 			}
+
+			igt_subtest_f("busy-hang-%s", e->name)
+				single(fd, e, TEST_BUSY | FLAG_HANG);
 		}
 
 		/**
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-03-01  9:38 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-02-19 19:12 [PATCH i-g-t] tests/perf_pmu: Test busyness reporting in face of GPU hangs Tvrtko Ursulin
2018-02-19 19:21 ` [igt-dev] " Chris Wilson
2018-02-19 19:26   ` Tvrtko Ursulin
2018-02-28 17:15   ` [PATCH i-g-t v2] " Tvrtko Ursulin
2018-03-01  8:08     ` Chris Wilson
2018-03-01  9:21       ` Tvrtko Ursulin
2018-03-01  9:27         ` Chris Wilson
2018-03-01  9:38           ` [PATCH i-g-t v3] " Tvrtko Ursulin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox