* [PATCH i-g-t] tests/perf_pmu: Verify engine busyness accuracy
@ 2018-02-15 11:53 Tvrtko Ursulin
2018-02-15 12:43 ` Chris Wilson
0 siblings, 1 reply; 10+ messages in thread
From: Tvrtko Ursulin @ 2018-02-15 11:53 UTC (permalink / raw)
To: igt-dev; +Cc: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
A subtest to verify that the engine busyness is reported with expected
accuracy on platforms where the feature is available.
We test three patterns: 2%, 50% and 98% load per engine.
v2:
* Use spin batch instead of nop calibration.
* Various tweaks.
v3:
* Change loops to be time based.
* Use __igt_spin_batch_new inside timing sensitive loops.
* Fixed PWM sleep handling.
v4:
* Use restarting spin batch.
* Calibrate more carefully by looking at the real PWM loop.
v5:
* Made standalone.
* Better info messages.
* Tweak sleep compensation.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
tests/perf_pmu.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 174 insertions(+), 18 deletions(-)
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index a7501ca5f7a4..fa9b54793a52 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -35,6 +35,7 @@
#include <dirent.h>
#include <time.h>
#include <poll.h>
+#include <sched.h>
#include "igt.h"
#include "igt_core.h"
@@ -385,6 +386,22 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
gem_quiescent_gpu(gem_fd);
}
+static void
+__submit_spin_batch(int gem_fd, igt_spin_t *spin,
+ const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = spin->handle
+ };
+ struct drm_i915_gem_execbuffer2 eb = {
+ .buffer_count = 1,
+ .buffers_ptr = to_user_pointer(&obj),
+ .flags = e2ring(gem_fd, e),
+ };
+
+ gem_execbuf(gem_fd, &eb);
+}
+
static void
most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
const unsigned int num_engines, unsigned int flags)
@@ -405,15 +422,7 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
if (e == e_) {
idle_idx = i;
} else if (spin) {
- struct drm_i915_gem_exec_object2 obj = {
- .handle = spin->handle
- };
- struct drm_i915_gem_execbuffer2 eb = {
- .buffer_count = 1,
- .buffers_ptr = to_user_pointer(&obj),
- .flags = e2ring(gem_fd, e_),
- };
- gem_execbuf(gem_fd, &eb);
+ __submit_spin_batch(gem_fd, spin, e_);
} else {
spin = igt_spin_batch_new(gem_fd, 0,
e2ring(gem_fd, e_), 0);
@@ -469,15 +478,7 @@ all_busy_check_all(int gem_fd, const unsigned int num_engines,
continue;
if (spin) {
- struct drm_i915_gem_exec_object2 obj = {
- .handle = spin->handle
- };
- struct drm_i915_gem_execbuffer2 eb = {
- .buffer_count = 1,
- .buffers_ptr = to_user_pointer(&obj),
- .flags = e2ring(gem_fd, e),
- };
- gem_execbuf(gem_fd, &eb);
+ __submit_spin_batch(gem_fd, spin, e);
} else {
spin = igt_spin_batch_new(gem_fd, 0,
e2ring(gem_fd, e), 0);
@@ -1390,6 +1391,150 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
gem_quiescent_gpu(gem_fd);
}
+static double __error(double val, double ref)
+{
+ igt_assert(ref != 0.0);
+ return (100.0 * val / ref) - 100.0;
+}
+
+static void __rearm_spin_batch(igt_spin_t *spin)
+{
+ const uint32_t mi_arb_chk = 0x5 << 23;
+
+ *spin->batch = mi_arb_chk;
+ __sync_synchronize();
+}
+
+#define div_round_up(a, b) (((a) + (b) - 1) / (b))
+
+static void
+accuracy(int gem_fd, const struct intel_execution_engine2 *e,
+ unsigned long target_busy_pct)
+{
+ const unsigned int min_test_loops = 7;
+ const unsigned long min_test_us = 1e6;
+ unsigned long busy_us = 2500;
+ unsigned long idle_us = 100 * (busy_us - target_busy_pct *
+ busy_us / 100) / target_busy_pct;
+ unsigned long pwm_calibration_us;
+ unsigned long test_us;
+ double busy_r;
+ uint64_t val[2];
+ uint64_t ts[2];
+ int fd;
+
+ /* Sampling platforms cannot reach the high accuracy criteria. */
+ igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8);
+
+ while (idle_us < 2500) {
+ busy_us *= 2;
+ idle_us *= 2;
+ }
+
+ pwm_calibration_us = min_test_loops * (busy_us + idle_us);
+ while (pwm_calibration_us < min_test_us)
+ pwm_calibration_us += busy_us + idle_us;
+ test_us = min_test_loops * (idle_us + busy_us);
+ while (test_us < min_test_us)
+ test_us += busy_us + idle_us;
+
+ igt_info("calibration=%luus, test=%luus; busy=%luus, idle=%luus\n",
+ pwm_calibration_us, test_us, busy_us, idle_us);
+
+ assert_within_epsilon((double)busy_us / (busy_us + idle_us),
+ (double)target_busy_pct / 100.0, tolerance);
+
+ /* Emit PWM pattern on the engine from a child. */
+ igt_fork(child, 1) {
+ struct sched_param rt = { .sched_priority = 99 };
+ const unsigned long timeout[] = { pwm_calibration_us * 1000,
+ test_us * 2 * 1000 };
+ unsigned long sleep_busy = busy_us;
+ unsigned long sleep_idle = idle_us;
+ igt_spin_t *spin;
+
+ /* We need the best sleep accuracy we can get. */
+ igt_require(sched_setscheduler(0,
+ SCHED_FIFO | SCHED_RESET_ON_FORK,
+ &rt) == 0);
+
+ /* Allocate our spin batch and idle it. */
+ spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+ igt_spin_batch_end(spin);
+ gem_sync(gem_fd, spin->handle);
+
+ /* 1st pass is calibration, second pass is the test. */
+ for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
+ unsigned long busy_ns = 0, idle_ns = 0;
+ struct timespec test_start = { };
+ unsigned long loops = 0;
+ double err_busy, err_idle;
+
+ igt_nsec_elapsed(&test_start);
+ do {
+ struct timespec t_busy = { };
+
+ igt_nsec_elapsed(&t_busy);
+
+ /* Restart the spinbatch. */
+ __rearm_spin_batch(spin);
+ __submit_spin_batch(gem_fd, spin, e);
+ measured_usleep(sleep_busy);
+ igt_spin_batch_end(spin);
+ gem_sync(gem_fd, spin->handle);
+
+ busy_ns += igt_nsec_elapsed(&t_busy);
+
+ idle_ns += measured_usleep(sleep_idle);
+
+ loops++;
+ } while (igt_nsec_elapsed(&test_start) < timeout[pass]);
+
+ busy_ns = div_round_up(busy_ns, loops);
+ idle_ns = div_round_up(idle_ns, loops);
+
+ err_busy = __error(busy_ns / 1000, busy_us);
+ err_idle = __error(idle_ns / 1000, idle_us);
+
+ igt_info("%u: busy %lu/%lu %.2f%%, idle %lu/%lu %.2f%%\n",
+ pass,
+ busy_ns / 1000, busy_us, err_busy,
+ idle_ns / 1000, idle_us, err_idle);
+
+ if (pass == 0) {
+ sleep_busy = (double)busy_us -
+ (double)busy_us * err_busy / 100.0;
+ sleep_idle = (double)idle_us -
+ (double)idle_us * err_idle / 100.0;
+ igt_info("calibrated sleeps: busy=%lu, idle=%lu\n",
+ sleep_busy, sleep_idle);
+ }
+ }
+
+ igt_spin_batch_free(gem_fd, spin);
+ }
+
+ /* Let the child run. */
+ usleep(pwm_calibration_us * 2);
+
+ /* Collect engine busyness for an interesting part of child runtime. */
+ fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
+ val[0] = __pmu_read_single(fd, &ts[0]);
+ usleep(test_us / 2);
+ val[1] = __pmu_read_single(fd, &ts[1]);
+ close(fd);
+
+ igt_waitchildren();
+
+ busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
+
+ igt_info("error=%.2f%% (%.2f%% vs %lu%%)\n",
+ __error(busy_r, target_busy_pct / 100.0),
+ busy_r * 100.0, target_busy_pct);
+
+ assert_within_epsilon(busy_r, (double)target_busy_pct / 100.0, 0.15);
+}
+
igt_main
{
const unsigned int num_other_metrics =
@@ -1418,6 +1563,8 @@ igt_main
invalid_init();
for_each_engine_class_instance(fd, e) {
+ const unsigned int pct[] = { 2, 50, 98 };
+
/**
* Test that a single engine metric can be initialized or it
* is correctly rejected.
@@ -1524,6 +1671,15 @@ igt_main
*/
igt_subtest_f("enable-race-%s", e->name)
test_enable_race(fd, e);
+
+ /**
+ * Check engine busyness accuracy is as expected.
+ */
+ for (i = 0; i < ARRAY_SIZE(pct); i++) {
+ igt_subtest_f("busy-accuracy-%u-%s",
+ pct[i], e->name)
+ accuracy(fd, e, pct[i]);
+ }
}
/**
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH i-g-t] tests/perf_pmu: Verify engine busyness accuracy
2018-02-15 11:53 [PATCH i-g-t] tests/perf_pmu: Verify engine busyness accuracy Tvrtko Ursulin
@ 2018-02-15 12:43 ` Chris Wilson
2018-02-15 15:34 ` [PATCH i-g-t v6] " Tvrtko Ursulin
0 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2018-02-15 12:43 UTC (permalink / raw)
To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx
Quoting Tvrtko Ursulin (2018-02-15 11:53:44)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> A subtest to verify that the engine busyness is reported with expected
> accuracy on platforms where the feature is available.
>
> We test three patterns: 2%, 50% and 98% load per engine.
>
> v2:
> * Use spin batch instead of nop calibration.
> * Various tweaks.
>
> v3:
> * Change loops to be time based.
> * Use __igt_spin_batch_new inside timing sensitive loops.
> * Fixed PWM sleep handling.
>
> v4:
> * Use restarting spin batch.
> * Calibrate more carefully by looking at the real PWM loop.
>
> v5:
> * Made standalone.
> * Better info messages.
> * Tweak sleep compensation.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> tests/perf_pmu.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++------
> 1 file changed, 174 insertions(+), 18 deletions(-)
>
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index a7501ca5f7a4..fa9b54793a52 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -35,6 +35,7 @@
> #include <dirent.h>
> #include <time.h>
> #include <poll.h>
> +#include <sched.h>
>
> #include "igt.h"
> #include "igt_core.h"
> @@ -385,6 +386,22 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
> gem_quiescent_gpu(gem_fd);
> }
>
> +static void
> +__submit_spin_batch(int gem_fd, igt_spin_t *spin,
> + const struct intel_execution_engine2 *e)
> +{
> + struct drm_i915_gem_exec_object2 obj = {
> + .handle = spin->handle
> + };
> + struct drm_i915_gem_execbuffer2 eb = {
> + .buffer_count = 1,
> + .buffers_ptr = to_user_pointer(&obj),
> + .flags = e2ring(gem_fd, e),
> + };
> +
> + gem_execbuf(gem_fd, &eb);
> +}
> +
> static void
> most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
> const unsigned int num_engines, unsigned int flags)
> @@ -405,15 +422,7 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
> if (e == e_) {
> idle_idx = i;
> } else if (spin) {
> - struct drm_i915_gem_exec_object2 obj = {
> - .handle = spin->handle
> - };
> - struct drm_i915_gem_execbuffer2 eb = {
> - .buffer_count = 1,
> - .buffers_ptr = to_user_pointer(&obj),
> - .flags = e2ring(gem_fd, e_),
> - };
> - gem_execbuf(gem_fd, &eb);
> + __submit_spin_batch(gem_fd, spin, e_);
> } else {
> spin = igt_spin_batch_new(gem_fd, 0,
> e2ring(gem_fd, e_), 0);
> @@ -469,15 +478,7 @@ all_busy_check_all(int gem_fd, const unsigned int num_engines,
> continue;
>
> if (spin) {
> - struct drm_i915_gem_exec_object2 obj = {
> - .handle = spin->handle
> - };
> - struct drm_i915_gem_execbuffer2 eb = {
> - .buffer_count = 1,
> - .buffers_ptr = to_user_pointer(&obj),
> - .flags = e2ring(gem_fd, e),
> - };
> - gem_execbuf(gem_fd, &eb);
> + __submit_spin_batch(gem_fd, spin, e);
> } else {
> spin = igt_spin_batch_new(gem_fd, 0,
> e2ring(gem_fd, e), 0);
> @@ -1390,6 +1391,150 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
> gem_quiescent_gpu(gem_fd);
> }
>
> +static double __error(double val, double ref)
> +{
> + igt_assert(ref != 0.0);
igt_assert(ref > 1e-5 /* smallval */) ?
Pretty sure a negative ref is also cause for confusion :)
> + return (100.0 * val / ref) - 100.0;
> +}
> +
> +static void __rearm_spin_batch(igt_spin_t *spin)
> +{
> + const uint32_t mi_arb_chk = 0x5 << 23;
> +
> + *spin->batch = mi_arb_chk;
> + __sync_synchronize();
> +}
> +
> +#define div_round_up(a, b) (((a) + (b) - 1) / (b))
> +
> +static void
> +accuracy(int gem_fd, const struct intel_execution_engine2 *e,
> + unsigned long target_busy_pct)
> +{
> + const unsigned int min_test_loops = 7;
> + const unsigned long min_test_us = 1e6;
> + unsigned long busy_us = 2500;
> + unsigned long idle_us = 100 * (busy_us - target_busy_pct *
> + busy_us / 100) / target_busy_pct;
> + unsigned long pwm_calibration_us;
> + unsigned long test_us;
> + double busy_r;
> + uint64_t val[2];
> + uint64_t ts[2];
> + int fd;
> +
> + /* Sampling platforms cannot reach the high accuracy criteria. */
> + igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8);
igt_require(gem_has_execlists(gem_fd));
> +
> + while (idle_us < 2500) {
> + busy_us *= 2;
> + idle_us *= 2;
> + }
> +
> + pwm_calibration_us = min_test_loops * (busy_us + idle_us);
> + while (pwm_calibration_us < min_test_us)
> + pwm_calibration_us += busy_us + idle_us;
> + test_us = min_test_loops * (idle_us + busy_us);
> + while (test_us < min_test_us)
> + test_us += busy_us + idle_us;
> +
> + igt_info("calibration=%luus, test=%luus; busy=%luus, idle=%luus\n",
> + pwm_calibration_us, test_us, busy_us, idle_us);
Would also be nice to show the adjusted %%.
> + assert_within_epsilon((double)busy_us / (busy_us + idle_us),
> + (double)target_busy_pct / 100.0, tolerance);
> +
> + /* Emit PWM pattern on the engine from a child. */
> + igt_fork(child, 1) {
> + struct sched_param rt = { .sched_priority = 99 };
> + const unsigned long timeout[] = { pwm_calibration_us * 1000,
> + test_us * 2 * 1000 };
> + unsigned long sleep_busy = busy_us;
> + unsigned long sleep_idle = idle_us;
> + igt_spin_t *spin;
> +
> + /* We need the best sleep accuracy we can get. */
> + igt_require(sched_setscheduler(0,
> + SCHED_FIFO | SCHED_RESET_ON_FORK,
> + &rt) == 0);
Can't use igt_require() or igt_assert() from children. So just igt_warn
if not applied.
Just SCHED_FIFO is enough as the child doesn't/shouldn't fork.
> +
> + /* Allocate our spin batch and idle it. */
> + spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
> + igt_spin_batch_end(spin);
> + gem_sync(gem_fd, spin->handle);
> +
> + /* 1st pass is calibration, second pass is the test. */
> + for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
> + unsigned long busy_ns = 0, idle_ns = 0;
> + struct timespec test_start = { };
> + unsigned long loops = 0;
> + double err_busy, err_idle;
> +
> + igt_nsec_elapsed(&test_start);
> + do {
> + struct timespec t_busy = { };
> +
> + igt_nsec_elapsed(&t_busy);
> +
> + /* Restart the spinbatch. */
> + __rearm_spin_batch(spin);
> + __submit_spin_batch(gem_fd, spin, e);
> + measured_usleep(sleep_busy);
> + igt_spin_batch_end(spin);
> + gem_sync(gem_fd, spin->handle);
> +
> + busy_ns += igt_nsec_elapsed(&t_busy);
> +
> + idle_ns += measured_usleep(sleep_idle);
> +
> + loops++;
> + } while (igt_nsec_elapsed(&test_start) < timeout[pass]);
> +
> + busy_ns = div_round_up(busy_ns, loops);
> + idle_ns = div_round_up(idle_ns, loops);
> +
> + err_busy = __error(busy_ns / 1000, busy_us);
> + err_idle = __error(idle_ns / 1000, idle_us);
> +
> + igt_info("%u: busy %lu/%lu %.2f%%, idle %lu/%lu %.2f%%\n",
> + pass,
> + busy_ns / 1000, busy_us, err_busy,
> + idle_ns / 1000, idle_us, err_idle);
Ok, makes sense.
> +
> + if (pass == 0) {
> + sleep_busy = (double)busy_us -
> + (double)busy_us * err_busy / 100.0;
> + sleep_idle = (double)idle_us -
> + (double)idle_us * err_idle / 100.0;
> + igt_info("calibrated sleeps: busy=%lu, idle=%lu\n",
> + sleep_busy, sleep_idle);
> + }
> + }
> +
> + igt_spin_batch_free(gem_fd, spin);
> + }
> +
> + /* Let the child run. */
> + usleep(pwm_calibration_us * 2);
> +
> + /* Collect engine busyness for an interesting part of child runtime. */
> + fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
> + val[0] = __pmu_read_single(fd, &ts[0]);
> + usleep(test_us / 2);
> + val[1] = __pmu_read_single(fd, &ts[1]);
> + close(fd);
> +
> + igt_waitchildren();
> +
> + busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
> +
> + igt_info("error=%.2f%% (%.2f%% vs %lu%%)\n",
> + __error(busy_r, target_busy_pct / 100.0),
> + busy_r * 100.0, target_busy_pct);
> +
> + assert_within_epsilon(busy_r, (double)target_busy_pct / 100.0, 0.15);
> +}
A fine compromise! :)
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread* [PATCH i-g-t v6] tests/perf_pmu: Verify engine busyness accuracy
2018-02-15 12:43 ` Chris Wilson
@ 2018-02-15 15:34 ` Tvrtko Ursulin
2018-02-17 11:36 ` [igt-dev] " Chris Wilson
0 siblings, 1 reply; 10+ messages in thread
From: Tvrtko Ursulin @ 2018-02-15 15:34 UTC (permalink / raw)
To: igt-dev; +Cc: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
A subtest to verify that the engine busyness is reported with expected
accuracy on platforms where the feature is available.
We test three patterns: 2%, 50% and 98% load per engine.
v2:
* Use spin batch instead of nop calibration.
* Various tweaks.
v3:
* Change loops to be time based.
* Use __igt_spin_batch_new inside timing sensitive loops.
* Fixed PWM sleep handling.
v4:
* Use restarting spin batch.
* Calibrate more carefully by looking at the real PWM loop.
v5:
* Made standalone.
* Better info messages.
* Tweak sleep compensation.
v6:
* Some final tweaks. (Chris Wilson)
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
tests/perf_pmu.c | 199 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 181 insertions(+), 18 deletions(-)
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 7d9c42d16a08..82053416cbea 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -35,6 +35,7 @@
#include <dirent.h>
#include <time.h>
#include <poll.h>
+#include <sched.h>
#include "igt.h"
#include "igt_core.h"
@@ -385,6 +386,22 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
gem_quiescent_gpu(gem_fd);
}
+static void
+__submit_spin_batch(int gem_fd, igt_spin_t *spin,
+ const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = spin->handle
+ };
+ struct drm_i915_gem_execbuffer2 eb = {
+ .buffer_count = 1,
+ .buffers_ptr = to_user_pointer(&obj),
+ .flags = e2ring(gem_fd, e),
+ };
+
+ gem_execbuf(gem_fd, &eb);
+}
+
static void
most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
const unsigned int num_engines, unsigned int flags)
@@ -405,15 +422,7 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
if (e == e_) {
idle_idx = i;
} else if (spin) {
- struct drm_i915_gem_exec_object2 obj = {
- .handle = spin->handle
- };
- struct drm_i915_gem_execbuffer2 eb = {
- .buffer_count = 1,
- .buffers_ptr = to_user_pointer(&obj),
- .flags = e2ring(gem_fd, e_),
- };
- gem_execbuf(gem_fd, &eb);
+ __submit_spin_batch(gem_fd, spin, e_);
} else {
spin = igt_spin_batch_new(gem_fd, 0,
e2ring(gem_fd, e_), 0);
@@ -469,15 +478,7 @@ all_busy_check_all(int gem_fd, const unsigned int num_engines,
continue;
if (spin) {
- struct drm_i915_gem_exec_object2 obj = {
- .handle = spin->handle
- };
- struct drm_i915_gem_execbuffer2 eb = {
- .buffer_count = 1,
- .buffers_ptr = to_user_pointer(&obj),
- .flags = e2ring(gem_fd, e),
- };
- gem_execbuf(gem_fd, &eb);
+ __submit_spin_batch(gem_fd, spin, e);
} else {
spin = igt_spin_batch_new(gem_fd, 0,
e2ring(gem_fd, e), 0);
@@ -1392,6 +1393,157 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
gem_quiescent_gpu(gem_fd);
}
+static double __error(double val, double ref)
+{
+ igt_assert(ref > 1e-5 /* smallval */);
+ return (100.0 * val / ref) - 100.0;
+}
+
+static void __rearm_spin_batch(igt_spin_t *spin)
+{
+ const uint32_t mi_arb_chk = 0x5 << 23;
+
+ *spin->batch = mi_arb_chk;
+ __sync_synchronize();
+}
+
+#define div_round_up(a, b) (((a) + (b) - 1) / (b))
+
+static void
+accuracy(int gem_fd, const struct intel_execution_engine2 *e,
+ unsigned long target_busy_pct)
+{
+ const unsigned int min_test_loops = 7;
+ const unsigned long min_test_us = 1e6;
+ unsigned long busy_us = 2500;
+ unsigned long idle_us = 100 * (busy_us - target_busy_pct *
+ busy_us / 100) / target_busy_pct;
+ unsigned long pwm_calibration_us;
+ unsigned long test_us;
+ double busy_r;
+ uint64_t val[2];
+ uint64_t ts[2];
+ int fd;
+
+ /* Sampling platforms cannot reach the high accuracy criteria. */
+ igt_require(gem_has_execlists(gem_fd));
+
+ while (idle_us < 2500) {
+ busy_us *= 2;
+ idle_us *= 2;
+ }
+
+ pwm_calibration_us = min_test_loops * (busy_us + idle_us);
+ while (pwm_calibration_us < min_test_us)
+ pwm_calibration_us += busy_us + idle_us;
+ test_us = min_test_loops * (idle_us + busy_us);
+ while (test_us < min_test_us)
+ test_us += busy_us + idle_us;
+
+ igt_info("calibration=%luus, test=%luus; ratio=%.2f%% (%luus/%luus)\n",
+ pwm_calibration_us, test_us,
+ (double)busy_us / (busy_us + idle_us) * 100.0,
+ busy_us, idle_us);
+
+ assert_within_epsilon((double)busy_us / (busy_us + idle_us),
+ (double)target_busy_pct / 100.0, tolerance);
+
+ /* Emit PWM pattern on the engine from a child. */
+ igt_fork(child, 1) {
+ struct sched_param rt = { .sched_priority = 99 };
+ const unsigned long timeout[] = { pwm_calibration_us * 1000,
+ test_us * 2 * 1000 };
+ unsigned long sleep_busy = busy_us;
+ unsigned long sleep_idle = idle_us;
+ igt_spin_t *spin;
+ int ret;
+
+ /* We need the best sleep accuracy we can get. */
+ ret = sched_setscheduler(0,
+ SCHED_FIFO | SCHED_RESET_ON_FORK,
+ &rt);
+ if (ret)
+ igt_warn("Failed to set scheduling policy!\n");
+
+ /* Allocate our spin batch and idle it. */
+ spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+ igt_spin_batch_end(spin);
+ gem_sync(gem_fd, spin->handle);
+
+ /* 1st pass is calibration, second pass is the test. */
+ for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
+ unsigned long busy_ns = 0, idle_ns = 0;
+ struct timespec test_start = { };
+ unsigned long loops = 0;
+ double err_busy, err_idle;
+
+ igt_nsec_elapsed(&test_start);
+ do {
+ struct timespec t_busy = { };
+
+ igt_nsec_elapsed(&t_busy);
+
+ /* Restart the spinbatch. */
+ __rearm_spin_batch(spin);
+ __submit_spin_batch(gem_fd, spin, e);
+ measured_usleep(sleep_busy);
+ igt_spin_batch_end(spin);
+ gem_sync(gem_fd, spin->handle);
+
+ busy_ns += igt_nsec_elapsed(&t_busy);
+
+ idle_ns += measured_usleep(sleep_idle);
+
+ loops++;
+ } while (igt_nsec_elapsed(&test_start) < timeout[pass]);
+
+ busy_ns = div_round_up(busy_ns, loops);
+ idle_ns = div_round_up(idle_ns, loops);
+
+ err_busy = __error(busy_ns / 1000, busy_us);
+ err_idle = __error(idle_ns / 1000, idle_us);
+
+ igt_info("%u: busy %lu/%lu %.2f%%, idle %lu/%lu %.2f%%\n",
+ pass,
+ busy_ns / 1000, busy_us, err_busy,
+ idle_ns / 1000, idle_us, err_idle);
+
+ if (pass == 0) {
+ sleep_busy = (double)busy_us -
+ (double)busy_us * err_busy / 100.0;
+ sleep_idle = (double)idle_us -
+ (double)idle_us * err_idle / 100.0;
+ igt_info("calibrated sleeps ratio %.2f%% (%lu/%lu)\n",
+ (double)sleep_busy /
+ (sleep_busy + sleep_idle) * 100.0,
+ sleep_busy, sleep_idle);
+ }
+ }
+
+ igt_spin_batch_free(gem_fd, spin);
+ }
+
+ /* Let the child run. */
+ usleep(pwm_calibration_us * 2);
+
+ /* Collect engine busyness for an interesting part of child runtime. */
+ fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
+ val[0] = __pmu_read_single(fd, &ts[0]);
+ usleep(test_us / 2);
+ val[1] = __pmu_read_single(fd, &ts[1]);
+ close(fd);
+
+ igt_waitchildren();
+
+ busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
+
+ igt_info("error=%.2f%% (%.2f%% vs %lu%%)\n",
+ __error(busy_r, target_busy_pct / 100.0),
+ busy_r * 100.0, target_busy_pct);
+
+ assert_within_epsilon(busy_r, (double)target_busy_pct / 100.0, 0.15);
+}
+
igt_main
{
const unsigned int num_other_metrics =
@@ -1420,6 +1572,8 @@ igt_main
invalid_init();
for_each_engine_class_instance(fd, e) {
+ const unsigned int pct[] = { 2, 50, 98 };
+
/**
* Test that a single engine metric can be initialized or it
* is correctly rejected.
@@ -1526,6 +1680,15 @@ igt_main
*/
igt_subtest_f("enable-race-%s", e->name)
test_enable_race(fd, e);
+
+ /**
+ * Check engine busyness accuracy is as expected.
+ */
+ for (i = 0; i < ARRAY_SIZE(pct); i++) {
+ igt_subtest_f("busy-accuracy-%u-%s",
+ pct[i], e->name)
+ accuracy(fd, e, pct[i]);
+ }
}
/**
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [igt-dev] [PATCH i-g-t v6] tests/perf_pmu: Verify engine busyness accuracy
2018-02-15 15:34 ` [PATCH i-g-t v6] " Tvrtko Ursulin
@ 2018-02-17 11:36 ` Chris Wilson
2018-02-19 9:19 ` Tvrtko Ursulin
0 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2018-02-17 11:36 UTC (permalink / raw)
To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx
Quoting Tvrtko Ursulin (2018-02-15 15:34:53)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> A subtest to verify that the engine busyness is reported with expected
> accuracy on platforms where the feature is available.
>
> We test three patterns: 2%, 50% and 98% load per engine.
>
> v2:
> * Use spin batch instead of nop calibration.
> * Various tweaks.
>
> v3:
> * Change loops to be time based.
> * Use __igt_spin_batch_new inside timing sensitive loops.
> * Fixed PWM sleep handling.
>
> v4:
> * Use restarting spin batch.
> * Calibrate more carefully by looking at the real PWM loop.
>
> v5:
> * Made standalone.
> * Better info messages.
> * Tweak sleep compensation.
>
> v6:
> * Some final tweaks. (Chris Wilson)
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> +
> + /* Sampling platforms cannot reach the high accuracy criteria. */
> + igt_require(gem_has_execlists(gem_fd));
But we don't handle guc, right?
igt_skip_on(gem_has_guc_submission(gem_fd)) ?
https://intel-gfx-ci.01.org/tree/drm-tip/kasan_2/fi-skl-guc/igt@perf_pmu@busy-accuracy-2-vecs0.html
Or at least it doesn't work to sufficient accuracy. And bsw hung.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [igt-dev] [PATCH i-g-t v6] tests/perf_pmu: Verify engine busyness accuracy
2018-02-17 11:36 ` [igt-dev] " Chris Wilson
@ 2018-02-19 9:19 ` Tvrtko Ursulin
2018-02-19 9:27 ` Chris Wilson
0 siblings, 1 reply; 10+ messages in thread
From: Tvrtko Ursulin @ 2018-02-19 9:19 UTC (permalink / raw)
To: Chris Wilson, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx
On 17/02/2018 11:36, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-02-15 15:34:53)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> A subtest to verify that the engine busyness is reported with expected
>> accuracy on platforms where the feature is available.
>>
>> We test three patterns: 2%, 50% and 98% load per engine.
>>
>> v2:
>> * Use spin batch instead of nop calibration.
>> * Various tweaks.
>>
>> v3:
>> * Change loops to be time based.
>> * Use __igt_spin_batch_new inside timing sensitive loops.
>> * Fixed PWM sleep handling.
>>
>> v4:
>> * Use restarting spin batch.
>> * Calibrate more carefully by looking at the real PWM loop.
>>
>> v5:
>> * Made standalone.
>> * Better info messages.
>> * Tweak sleep compensation.
>>
>> v6:
>> * Some final tweaks. (Chris Wilson)
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>> +
>> + /* Sampling platforms cannot reach the high accuracy criteria. */
>> + igt_require(gem_has_execlists(gem_fd));
>
> But we don't handle guc, right?
Correct.
> igt_skip_on(gem_has_guc_submission(gem_fd)) ?
I'll dig up and rebase my old patch which implements busy stats in GuC
mode.
> https://intel-gfx-ci.01.org/tree/drm-tip/kasan_2/fi-skl-guc/igt@perf_pmu@busy-accuracy-2-vecs0.html
>
> Or at least it doesn't work to sufficient accuracy. And bsw hung.
There are some occasional excursions over 15% tolerance even with
execlists on small core. Bummer. Don't want to be playing up the
tolerance game. I'll analyse in more detail and think what to do.
Do you have a link to BSW hang? Is that obviously related to PMU?
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [igt-dev] [PATCH i-g-t v6] tests/perf_pmu: Verify engine busyness accuracy
2018-02-19 9:19 ` Tvrtko Ursulin
@ 2018-02-19 9:27 ` Chris Wilson
2018-02-19 9:57 ` Tvrtko Ursulin
0 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2018-02-19 9:27 UTC (permalink / raw)
To: Tvrtko Ursulin, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx
Quoting Tvrtko Ursulin (2018-02-19 09:19:47)
>
> Do you have a link to BSW hang? Is that obviously related to PMU?
It's only occurring in this test, just looks like an issue with the
spinner:
[bsw] https://intel-gfx-ci.01.org/tree/drm-tip/kasan_2/fi-bsw-n3050/igt@perf_pmu@busy-accuracy-2-bcs0.html
[kbl] https://intel-gfx-ci.01.org/tree/drm-tip/kasan_2/fi-kbl-7560u/igt@perf_pmu@busy-accuracy-2-bcs0.html
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [igt-dev] [PATCH i-g-t v6] tests/perf_pmu: Verify engine busyness accuracy
2018-02-19 9:27 ` Chris Wilson
@ 2018-02-19 9:57 ` Tvrtko Ursulin
2018-02-19 10:26 ` Chris Wilson
0 siblings, 1 reply; 10+ messages in thread
From: Tvrtko Ursulin @ 2018-02-19 9:57 UTC (permalink / raw)
To: Chris Wilson, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx
On 19/02/2018 09:27, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-02-19 09:19:47)
>>
>> Do you have a link to BSW hang? Is that obviously related to PMU?
>
> It's only occurring in this test, just looks like an issue with the
> spinner:
>
> [bsw] https://intel-gfx-ci.01.org/tree/drm-tip/kasan_2/fi-bsw-n3050/igt@perf_pmu@busy-accuracy-2-bcs0.html
...
<0>[ 681.022677] perf_pmu-1516 1..s1 282520414us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
<0>[ 681.022838] perf_pmu-1516 1..s1 282520580us : execlists_submission_tasklet: bcs0 cs-irq head=5 [5?], tail=0 [0?]
<0>[ 681.023001] perf_pmu-1516 1..s1 282520594us : execlists_submission_tasklet: bcs0 csb[0]: status=0x00000001:0x00000000, active=0x1
<0>[ 681.023168] kworker/-338 1.... 298087910us : reset_common_ring: bcs0 seqno=a
<0>[ 681.023321] ksoftirq-17 1..s. 298088483us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
<0>[ 681.023482] ksoftirq-17 1..s. 298088575us : execlists_submission_tasklet: bcs0 cs-irq head=0 [0], tail=1 [1]
<0>[ 681.023644] ksoftirq-17 1..s. 298088579us : execlists_submission_tasklet: bcs0 csb[1]: status=0x00000018:0x00000003, active=0x1
<0>[ 681.023811] ksoftirq-17 1..s. 298088581us : execlists_submission_tasklet: bcs0 out[0]: ctx=3.1, seqno=a
Everything stops.
> [kbl] https://intel-gfx-ci.01.org/tree/drm-tip/kasan_2/fi-kbl-7560u/igt@perf_pmu@busy-accuracy-2-bcs0.html
...
<0>[ 506.745332] perf_pmu-1544 3..s1 107905835us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
<0>[ 506.745397] <idle>-0 2..s1 107905980us : execlists_submission_tasklet: bcs0 cs-irq head=2 [1?], tail=3 [3?]
<0>[ 506.745440] <idle>-0 2..s1 107905983us : execlists_submission_tasklet: bcs0 csb[3]: status=0x00000001:0x00000000, active=0x1
<0>[ 506.745498] kworker/-30 3.... 120840583us : reset_common_ring: bcs0 seqno=a
<0>[ 506.745547] ksoftirq-29 3..s. 120840688us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
<0>[ 506.745598] in:imklo-499 2..s1 120840710us : execlists_submission_tasklet: bcs0 cs-irq head=0 [0], tail=1 [1]
<0>[ 506.745637] in:imklo-499 2..s1 120840712us : execlists_submission_tasklet: bcs0 csb[1]: status=0x00000018:0x00000003, active=0x1
<0>[ 506.745676] in:imklo-499 2..s1 120840713us : execlists_submission_tasklet: bcs0 out[0]: ctx=3.1, seqno=a
Everything stops here.
I have not idea what's happening here. In both cases I would expect the test
to have exited after the GPU hang (or at least attempt to exit!), since it
would detect it overran the timeout.
Could it be stuck in gem_sync after the reset? Or somewhere else?
Could we add "echo t > /proc/sysrq-trigger" equivalent when owatch triggers?
Or it would overflow some buffer? Should work in cases like this one, when
it is not a machine hang.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [igt-dev] [PATCH i-g-t v6] tests/perf_pmu: Verify engine busyness accuracy
2018-02-19 9:57 ` Tvrtko Ursulin
@ 2018-02-19 10:26 ` Chris Wilson
2018-02-19 10:58 ` Tvrtko Ursulin
0 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2018-02-19 10:26 UTC (permalink / raw)
To: Tvrtko Ursulin, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx
Quoting Tvrtko Ursulin (2018-02-19 09:57:20)
>
> On 19/02/2018 09:27, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-02-19 09:19:47)
> >>
> >> Do you have a link to BSW hang? Is that obviously related to PMU?
> >
> > It's only occurring in this test, just looks like an issue with the
> > spinner:
> >
> > [bsw] https://intel-gfx-ci.01.org/tree/drm-tip/kasan_2/fi-bsw-n3050/igt@perf_pmu@busy-accuracy-2-bcs0.html
>
> ...
> <0>[ 681.022677] perf_pmu-1516 1..s1 282520414us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
> <0>[ 681.022838] perf_pmu-1516 1..s1 282520580us : execlists_submission_tasklet: bcs0 cs-irq head=5 [5?], tail=0 [0?]
> <0>[ 681.023001] perf_pmu-1516 1..s1 282520594us : execlists_submission_tasklet: bcs0 csb[0]: status=0x00000001:0x00000000, active=0x1
> <0>[ 681.023168] kworker/-338 1.... 298087910us : reset_common_ring: bcs0 seqno=a
> <0>[ 681.023321] ksoftirq-17 1..s. 298088483us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
> <0>[ 681.023482] ksoftirq-17 1..s. 298088575us : execlists_submission_tasklet: bcs0 cs-irq head=0 [0], tail=1 [1]
> <0>[ 681.023644] ksoftirq-17 1..s. 298088579us : execlists_submission_tasklet: bcs0 csb[1]: status=0x00000018:0x00000003, active=0x1
> <0>[ 681.023811] ksoftirq-17 1..s. 298088581us : execlists_submission_tasklet: bcs0 out[0]: ctx=3.1, seqno=a
>
> Everything stops.
>
> > [kbl] https://intel-gfx-ci.01.org/tree/drm-tip/kasan_2/fi-kbl-7560u/igt@perf_pmu@busy-accuracy-2-bcs0.html
>
> ...
> <0>[ 506.745332] perf_pmu-1544 3..s1 107905835us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
> <0>[ 506.745397] <idle>-0 2..s1 107905980us : execlists_submission_tasklet: bcs0 cs-irq head=2 [1?], tail=3 [3?]
> <0>[ 506.745440] <idle>-0 2..s1 107905983us : execlists_submission_tasklet: bcs0 csb[3]: status=0x00000001:0x00000000, active=0x1
> <0>[ 506.745498] kworker/-30 3.... 120840583us : reset_common_ring: bcs0 seqno=a
> <0>[ 506.745547] ksoftirq-29 3..s. 120840688us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
> <0>[ 506.745598] in:imklo-499 2..s1 120840710us : execlists_submission_tasklet: bcs0 cs-irq head=0 [0], tail=1 [1]
> <0>[ 506.745637] in:imklo-499 2..s1 120840712us : execlists_submission_tasklet: bcs0 csb[1]: status=0x00000018:0x00000003, active=0x1
> <0>[ 506.745676] in:imklo-499 2..s1 120840713us : execlists_submission_tasklet: bcs0 out[0]: ctx=3.1, seqno=a
>
> Everything stops here.
>
> I have not idea what's happening here. In both cases I would expect the test
> to have exited after the GPU hang (or at least attempt to exit!), since it
> would detect it overran the timeout.
>
> Could it be stuck in gem_sync after the reset? Or somewhere else?
I think it's that we will be throwing the calibration off if it hangs.
If busy_ns = 10s, won't that generate a target idle time of 500s?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [igt-dev] [PATCH i-g-t v6] tests/perf_pmu: Verify engine busyness accuracy
2018-02-19 10:26 ` Chris Wilson
@ 2018-02-19 10:58 ` Tvrtko Ursulin
2018-02-19 11:04 ` Chris Wilson
0 siblings, 1 reply; 10+ messages in thread
From: Tvrtko Ursulin @ 2018-02-19 10:58 UTC (permalink / raw)
To: Chris Wilson, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx
On 19/02/2018 10:26, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-02-19 09:57:20)
>>
>> On 19/02/2018 09:27, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-02-19 09:19:47)
>>>>
>>>> Do you have a link to BSW hang? Is that obviously related to PMU?
>>>
>>> It's only occurring in this test, just looks like an issue with the
>>> spinner:
>>>
>>> [bsw] https://intel-gfx-ci.01.org/tree/drm-tip/kasan_2/fi-bsw-n3050/igt@perf_pmu@busy-accuracy-2-bcs0.html
>>
>> ...
>> <0>[ 681.022677] perf_pmu-1516 1..s1 282520414us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
>> <0>[ 681.022838] perf_pmu-1516 1..s1 282520580us : execlists_submission_tasklet: bcs0 cs-irq head=5 [5?], tail=0 [0?]
>> <0>[ 681.023001] perf_pmu-1516 1..s1 282520594us : execlists_submission_tasklet: bcs0 csb[0]: status=0x00000001:0x00000000, active=0x1
>> <0>[ 681.023168] kworker/-338 1.... 298087910us : reset_common_ring: bcs0 seqno=a
>> <0>[ 681.023321] ksoftirq-17 1..s. 298088483us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
>> <0>[ 681.023482] ksoftirq-17 1..s. 298088575us : execlists_submission_tasklet: bcs0 cs-irq head=0 [0], tail=1 [1]
>> <0>[ 681.023644] ksoftirq-17 1..s. 298088579us : execlists_submission_tasklet: bcs0 csb[1]: status=0x00000018:0x00000003, active=0x1
>> <0>[ 681.023811] ksoftirq-17 1..s. 298088581us : execlists_submission_tasklet: bcs0 out[0]: ctx=3.1, seqno=a
>>
>> Everything stops.
>>
>>> [kbl] https://intel-gfx-ci.01.org/tree/drm-tip/kasan_2/fi-kbl-7560u/igt@perf_pmu@busy-accuracy-2-bcs0.html
>>
>> ...
>> <0>[ 506.745332] perf_pmu-1544 3..s1 107905835us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
>> <0>[ 506.745397] <idle>-0 2..s1 107905980us : execlists_submission_tasklet: bcs0 cs-irq head=2 [1?], tail=3 [3?]
>> <0>[ 506.745440] <idle>-0 2..s1 107905983us : execlists_submission_tasklet: bcs0 csb[3]: status=0x00000001:0x00000000, active=0x1
>> <0>[ 506.745498] kworker/-30 3.... 120840583us : reset_common_ring: bcs0 seqno=a
>> <0>[ 506.745547] ksoftirq-29 3..s. 120840688us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
>> <0>[ 506.745598] in:imklo-499 2..s1 120840710us : execlists_submission_tasklet: bcs0 cs-irq head=0 [0], tail=1 [1]
>> <0>[ 506.745637] in:imklo-499 2..s1 120840712us : execlists_submission_tasklet: bcs0 csb[1]: status=0x00000018:0x00000003, active=0x1
>> <0>[ 506.745676] in:imklo-499 2..s1 120840713us : execlists_submission_tasklet: bcs0 out[0]: ctx=3.1, seqno=a
>>
>> Everything stops here.
>>
>> I have not idea what's happening here. In both cases I would expect the test
>> to have exited after the GPU hang (or at least attempt to exit!), since it
>> would detect it overran the timeout.
>>
>> Could it be stuck in gem_sync after the reset? Or somewhere else?
>
> I think it's that we will be throwing the calibration off if it hangs.
> If busy_ns = 10s, won't that generate a target idle time of 500s?
Indeed, well spotted. I'll need to add a hang detector of some sort.
In the meantime trying to figure out how to wire up GuC to engine stats.
The fix to get correct state on stats enable by looking at ports is a
problem given different tracking in GuC mode I had.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [igt-dev] [PATCH i-g-t v6] tests/perf_pmu: Verify engine busyness accuracy
2018-02-19 10:58 ` Tvrtko Ursulin
@ 2018-02-19 11:04 ` Chris Wilson
0 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2018-02-19 11:04 UTC (permalink / raw)
To: Tvrtko Ursulin, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx
Quoting Tvrtko Ursulin (2018-02-19 10:58:25)
>
> On 19/02/2018 10:26, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-02-19 09:57:20)
> >>
> >> On 19/02/2018 09:27, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2018-02-19 09:19:47)
> >>>>
> >>>> Do you have a link to BSW hang? Is that obviously related to PMU?
> >>>
> >>> It's only occurring in this test, just looks like an issue with the
> >>> spinner:
> >>>
> >>> [bsw] https://intel-gfx-ci.01.org/tree/drm-tip/kasan_2/fi-bsw-n3050/igt@perf_pmu@busy-accuracy-2-bcs0.html
> >>
> >> ...
> >> <0>[ 681.022677] perf_pmu-1516 1..s1 282520414us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
> >> <0>[ 681.022838] perf_pmu-1516 1..s1 282520580us : execlists_submission_tasklet: bcs0 cs-irq head=5 [5?], tail=0 [0?]
> >> <0>[ 681.023001] perf_pmu-1516 1..s1 282520594us : execlists_submission_tasklet: bcs0 csb[0]: status=0x00000001:0x00000000, active=0x1
> >> <0>[ 681.023168] kworker/-338 1.... 298087910us : reset_common_ring: bcs0 seqno=a
> >> <0>[ 681.023321] ksoftirq-17 1..s. 298088483us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
> >> <0>[ 681.023482] ksoftirq-17 1..s. 298088575us : execlists_submission_tasklet: bcs0 cs-irq head=0 [0], tail=1 [1]
> >> <0>[ 681.023644] ksoftirq-17 1..s. 298088579us : execlists_submission_tasklet: bcs0 csb[1]: status=0x00000018:0x00000003, active=0x1
> >> <0>[ 681.023811] ksoftirq-17 1..s. 298088581us : execlists_submission_tasklet: bcs0 out[0]: ctx=3.1, seqno=a
> >>
> >> Everything stops.
> >>
> >>> [kbl] https://intel-gfx-ci.01.org/tree/drm-tip/kasan_2/fi-kbl-7560u/igt@perf_pmu@busy-accuracy-2-bcs0.html
> >>
> >> ...
> >> <0>[ 506.745332] perf_pmu-1544 3..s1 107905835us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
> >> <0>[ 506.745397] <idle>-0 2..s1 107905980us : execlists_submission_tasklet: bcs0 cs-irq head=2 [1?], tail=3 [3?]
> >> <0>[ 506.745440] <idle>-0 2..s1 107905983us : execlists_submission_tasklet: bcs0 csb[3]: status=0x00000001:0x00000000, active=0x1
> >> <0>[ 506.745498] kworker/-30 3.... 120840583us : reset_common_ring: bcs0 seqno=a
> >> <0>[ 506.745547] ksoftirq-29 3..s. 120840688us : execlists_submission_tasklet: bcs0 in[0]: ctx=3.1, seqno=a
> >> <0>[ 506.745598] in:imklo-499 2..s1 120840710us : execlists_submission_tasklet: bcs0 cs-irq head=0 [0], tail=1 [1]
> >> <0>[ 506.745637] in:imklo-499 2..s1 120840712us : execlists_submission_tasklet: bcs0 csb[1]: status=0x00000018:0x00000003, active=0x1
> >> <0>[ 506.745676] in:imklo-499 2..s1 120840713us : execlists_submission_tasklet: bcs0 out[0]: ctx=3.1, seqno=a
> >>
> >> Everything stops here.
> >>
> >> I have not idea what's happening here. In both cases I would expect the test
> >> to have exited after the GPU hang (or at least attempt to exit!), since it
> >> would detect it overran the timeout.
> >>
> >> Could it be stuck in gem_sync after the reset? Or somewhere else?
> >
> > I think it's that we will be throwing the calibration off if it hangs.
> > If busy_ns = 10s, won't that generate a target idle time of 500s?
>
> Indeed, well spotted. I'll need to add a hang detector of some sort.
Oh, I think I know why it's hanging. As the buffer will be idle, the
kernel is allowed to move it, and __submit_spin_batch() doesn't tell the
kernel to preserve the original address (so the kernel assumes that the
relocations are relative to the passed in address and so move the buffer
to match). I should have noticed that before given the discussion around
EXEC_OBJECT_PINNED for the spinner.
I think there's an easy enough patch...
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2018-02-19 11:04 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-02-15 11:53 [PATCH i-g-t] tests/perf_pmu: Verify engine busyness accuracy Tvrtko Ursulin
2018-02-15 12:43 ` Chris Wilson
2018-02-15 15:34 ` [PATCH i-g-t v6] " Tvrtko Ursulin
2018-02-17 11:36 ` [igt-dev] " Chris Wilson
2018-02-19 9:19 ` Tvrtko Ursulin
2018-02-19 9:27 ` Chris Wilson
2018-02-19 9:57 ` Tvrtko Ursulin
2018-02-19 10:26 ` Chris Wilson
2018-02-19 10:58 ` Tvrtko Ursulin
2018-02-19 11:04 ` Chris Wilson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox