* [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-06-02 0:26 Chris Wilson
2020-06-02 0:58 ` [igt-dev] ✓ Fi.CI.BAT: success for i915/gem_exec_schedule: Try to spot unfairness (rev4) Patchwork
2020-06-02 9:18 ` [Intel-gfx] [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness Mika Kuoppala
0 siblings, 2 replies; 4+ messages in thread
From: Chris Wilson @ 2020-06-02 0:26 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev, Tvrtko Ursulin, Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 418 +++++++++++++++++++++++++++++++++
1 file changed, 418 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..d1121ecd2 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,417 @@ static void measure_semaphore_power(int i915)
rapl_close(&pkg);
}
+static int read_timestamp_frequency(int i915)
+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = &value,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+ return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+ return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+ NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+ return div64_u64_round_up(ticks * NSEC_PER_SEC,
+ read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(START_TS);
+
+ if (offset_in_page(cs) & 4)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(NOW_TS);
+
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void tslog(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+ enum { ONE, MASK, ADDR };
+ uint32_t *timestamp_lo, *addr_lo;
+ uint32_t *map, *cs;
+
+ igt_require(base);
+
+ map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+ cs = map + 512;
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_TIMESTAMP;
+ timestamp_lo = cs;
+ *cs++ = addr;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR);
+ addr_lo = cs;
+ *cs++ = addr;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR) + 4;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ONE);
+ *cs++ = 4;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ONE) + 4;
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK);
+ *cs++ = 0xfffff7ff;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK) + 4;
+ *cs++ = 0xffffffff;
+
+ *cs++ = MI_MATH(8);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ONE));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+ *cs++ = MI_MATH_AND;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(timestamp_lo);
+ *cs++ = addr >> 32;
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(addr_lo);
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ tslog(i915, e, obj.handle, obj.offset);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeout,
+ int timeline,
+ unsigned int flags,
+ unsigned long *ctl,
+ unsigned long *out)
+#define F_PACING 0x1
+#define F_EXTERNAL 0x2
+{
+ const int batches_per_frame = 3;
+ struct drm_i915_gem_exec_object2 prev =
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct drm_i915_gem_exec_object2 next =
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct drm_i915_gem_exec_object2 ts = tslog_create(i915, ctx, e);
+ struct timespec tv = {};
+ unsigned long count = 0;
+ int p_fence = -1, n_fence = -1;
+ uint32_t *map;
+ int n;
+
+ igt_nsec_elapsed(&tv);
+ while (!READ_ONCE(*ctl)) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&next),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .rsvd2 = -1,
+ .flags = e->flags,
+ };
+
+ if (flags & F_EXTERNAL) {
+ execbuf.rsvd2 =
+ sw_sync_timeline_create_fence(timeline, count);
+ execbuf.flags |= I915_EXEC_FENCE_IN;
+ }
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ gem_execbuf_wr(i915, &execbuf);
+ n_fence = execbuf.rsvd2 >> 32;
+ execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+ for (n = 1; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+
+ execbuf.buffers_ptr = to_user_pointer(&ts);
+ execbuf.batch_start_offset = 2048;
+ gem_execbuf(i915, &execbuf);
+
+ if (flags & F_PACING && p_fence != -1) {
+ struct pollfd pfd = {
+ .fd = p_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+ close(p_fence);
+ close(execbuf.rsvd2);
+
+ igt_swap(prev, next);
+ igt_swap(p_fence, n_fence);
+ count++;
+ }
+ gem_sync(i915, prev.handle);
+ close(p_fence);
+
+ gem_close(i915, next.handle);
+ gem_close(i915, prev.handle);
+
+ map = gem_mmap__device_coherent(i915, ts.handle, 0, 4096, PROT_WRITE);
+ for (n = 1; n < min(count, 512); n++)
+ map[n - 1] = map[n] - map[n - 1];
+ qsort(map, --n, sizeof(*map), cmp_u32);
+ *out = ticks_to_ns(i915, map[n / 2]);
+ munmap(map, 4096);
+
+ gem_close(i915, ts.handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout, unsigned int flags)
+{
+ const int frame_ns = 16666 * 1000;
+ unsigned long *result;
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+ igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+ for (int n = 2; n <= 16; n <<= 1) {
+ int timeline = sw_sync_timeline_create();
+ int nframes = timeout * NSEC_PER_SEC / frame_ns + 1;
+ const int nchild = n - 1; /* odd for easy medians */
+ const int lo = nchild / 4;
+ const int hi = (3 * nchild + 3) / 4 - 1;
+ struct igt_mean m;
+
+ memset(result, 0, (nchild + 1) * sizeof(result[0]));
+ igt_fork(child, nchild) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+ fair_child(i915, ctx, e, frame_ns / nchild,
+ timeout, timeline, flags,
+ &result[nchild],
+ &result[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+
+ while (nframes--) {
+ struct timespec tv = { .tv_nsec = frame_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+ }
+ result[nchild] = 1;
+ for (int child = 0; child < nchild; child++) {
+ while (!READ_ONCE(result[child])) {
+ struct timespec tv = { .tv_nsec = frame_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+ }
+ }
+ igt_waitchildren();
+ close(timeline);
+
+ igt_mean_init(&m);
+ for (int child = 0; child < nchild; child++)
+ igt_mean_add(&m, result[child]);
+
+ qsort(result, nchild, sizeof(*result), cmp_ul);
+ igt_info("%d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+ nchild,
+ 1e-6 * result[0], 1e-6 * result[nchild - 1],
+ 1e-6 * result[lo], 1e-6 * result[hi],
+ 1e-6 * result[nchild / 2],
+ 1e-6 * igt_mean_get(&m),
+ 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+#if 0
+ /* Mean within 10% of target */
+ igt_assert( 9 * igt_mean_get(&m) > 10 * frame_ns &&
+ 10 * igt_mean_get(&m) < 9 * frame_ns);
+
+ /* Variance [inter-quartile range] is less than 33% of median */
+ igt_assert(3 * result[hi] - result[lo] < result[nchild / 2]);
+#endif
+ }
+
+ munmap(result, 4096);
+}
+
#define test_each_engine(T, i915, e) \
igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
igt_dynamic_f("%s", e->name)
@@ -2589,6 +3000,13 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ test_each_engine_store("fair-none", fd, e)
+ fairness(fd, e, 2, 0);
+ test_each_engine_store("fair-pace", fd, e)
+ fairness(fd, e, 2, F_PACING);
+ test_each_engine_store("fair-sync", fd, e)
+ fairness(fd, e, 2, F_PACING | F_EXTERNAL);
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.27.0.rc2
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply related [flat|nested] 4+ messages in thread* [igt-dev] ✓ Fi.CI.BAT: success for i915/gem_exec_schedule: Try to spot unfairness (rev4)
2020-06-02 0:26 [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness Chris Wilson
@ 2020-06-02 0:58 ` Patchwork
2020-06-02 9:18 ` [Intel-gfx] [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness Mika Kuoppala
1 sibling, 0 replies; 4+ messages in thread
From: Patchwork @ 2020-06-02 0:58 UTC (permalink / raw)
To: Chris Wilson; +Cc: igt-dev
== Series Details ==
Series: i915/gem_exec_schedule: Try to spot unfairness (rev4)
URL : https://patchwork.freedesktop.org/series/77887/
State : success
== Summary ==
CI Bug Log - changes from CI_DRM_8568 -> IGTPW_4634
====================================================
Summary
-------
**SUCCESS**
No regressions found.
External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4634/index.html
Changes
-------
No changes found
Participating hosts (51 -> 45)
------------------------------
Missing (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-byt-clapper fi-bdw-samus
Build changes
-------------
* CI: CI-20190529 -> None
* IGT: IGT_5690 -> IGTPW_4634
CI-20190529: 20190529
CI_DRM_8568: 124bafc80c3ce62fc61b8eabb2657c87424b999b @ git://anongit.freedesktop.org/gfx-ci/linux
IGTPW_4634: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4634/index.html
IGT_5690: bea881189520a9cccbb1c1cb454ac5b6fdaea40e @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
== Testlist changes ==
+igt@gem_exec_schedule@fair-none
+igt@gem_exec_schedule@fair-pace
+igt@gem_exec_schedule@fair-sync
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4634/index.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Intel-gfx] [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
2020-06-02 0:26 [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness Chris Wilson
2020-06-02 0:58 ` [igt-dev] ✓ Fi.CI.BAT: success for i915/gem_exec_schedule: Try to spot unfairness (rev4) Patchwork
@ 2020-06-02 9:18 ` Mika Kuoppala
2020-06-02 9:23 ` [igt-dev] [Intel-gfx] " Chris Wilson
1 sibling, 1 reply; 4+ messages in thread
From: Mika Kuoppala @ 2020-06-02 9:18 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: igt-dev, Chris Wilson
Chris Wilson <chris@chris-wilson.co.uk> writes:
> An important property for multi-client systems is that each client gets
> a 'fair' allotment of system time. (Where fairness is at the whim of the
> context properties, such as priorities.) This test forks N independent
> clients (albeit they happen to share a single vm), and does an equal
> amount of work in client and asserts that they take an equal amount of
> time.
>
> Though we have never claimed to have a completely fair scheduler, that
> is what is expected.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Ramalingam C <ramalingam.c@intel.com>
> ---
> tests/i915/gem_exec_schedule.c | 418 +++++++++++++++++++++++++++++++++
> 1 file changed, 418 insertions(+)
>
> diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
> index 56c638833..d1121ecd2 100644
> --- a/tests/i915/gem_exec_schedule.c
> +++ b/tests/i915/gem_exec_schedule.c
> @@ -2495,6 +2495,417 @@ static void measure_semaphore_power(int i915)
> rapl_close(&pkg);
> }
>
> +static int read_timestamp_frequency(int i915)
> +{
> + int value = 0;
> + drm_i915_getparam_t gp = {
> + .value = &value,
> + .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
> + };
> + ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
> + return value;
> +}
> +
> +static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
> +{
> + return (x + y - 1) / y;
> +}
> +
> +static uint64_t ns_to_ticks(int i915, uint64_t ns)
> +{
> + return div64_u64_round_up(ns * read_timestamp_frequency(i915),
> + NSEC_PER_SEC);
> +}
> +
> +static uint64_t ticks_to_ns(int i915, uint64_t ticks)
> +{
> + return div64_u64_round_up(ticks * NSEC_PER_SEC,
> + read_timestamp_frequency(i915));
> +}
> +
> +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
> +
> +#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
> +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
> +/* Opcodes for MI_MATH_INSTR */
> +#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
> +#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
> +#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
> +#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
> +#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
> +#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
> +#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
> +#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
> +#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
> +#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
> +#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
> +#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
> +/* Registers used as operands in MI_MATH_INSTR */
> +#define MI_MATH_REG(x) (x)
> +#define MI_MATH_REG_SRCA 0x20
> +#define MI_MATH_REG_SRCB 0x21
> +#define MI_MATH_REG_ACCU 0x31
> +#define MI_MATH_REG_ZF 0x32
> +#define MI_MATH_REG_CF 0x33
Are you thinking that we should just pull in the driver gpu_commands.h
as is into lib?
-Mika
> +
> +#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
> +
> +static void delay(int i915,
> + const struct intel_execution_engine2 *e,
> + uint32_t handle,
> + uint64_t addr,
> + uint64_t ns)
> +{
> + const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
> + const uint32_t base = gem_engine_mmio_base(i915, e->name);
> +#define CS_GPR(x) (base + 0x600 + 8 * (x))
> +#define TIMESTAMP (base + 0x3a8)
> + enum { START_TS, NOW_TS };
> + uint32_t *map, *cs, *jmp;
> +
> + igt_require(base);
> +
> + cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
> +
> + *cs++ = MI_LOAD_REGISTER_IMM;
> + *cs++ = CS_GPR(START_TS) + 4;
> + *cs++ = 0;
> + *cs++ = MI_LOAD_REGISTER_REG;
> + *cs++ = TIMESTAMP;
> + *cs++ = CS_GPR(START_TS);
> +
> + if (offset_in_page(cs) & 4)
> + *cs++ = 0;
> + jmp = cs;
> +
> + *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
> +
> + *cs++ = MI_LOAD_REGISTER_IMM;
> + *cs++ = CS_GPR(NOW_TS) + 4;
> + *cs++ = 0;
> + *cs++ = MI_LOAD_REGISTER_REG;
> + *cs++ = TIMESTAMP;
> + *cs++ = CS_GPR(NOW_TS);
> +
> + *cs++ = MI_MATH(4);
> + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
> + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
> + *cs++ = MI_MATH_SUB;
> + *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
> +
> + *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
> + *cs++ = CS_GPR(NOW_TS);
> + *cs++ = addr + 4000;
> + *cs++ = addr >> 32;
> +
> + *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
> + *cs++ = ~ns_to_ticks(i915, ns);
> + *cs++ = addr + 4000;
> + *cs++ = addr >> 32;
> +
> + *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
> + *cs++ = addr + offset_in_page(jmp);
> + *cs++ = addr >> 32;
> +
> + munmap(map, 4096);
> +}
> +
> +static struct drm_i915_gem_exec_object2
> +delay_create(int i915, uint32_t ctx,
> + const struct intel_execution_engine2 *e,
> + uint64_t target_ns)
> +{
> + struct drm_i915_gem_exec_object2 obj = {
> + .handle = batch_create(i915),
> + .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
> + };
> + struct drm_i915_gem_execbuffer2 execbuf = {
> + .buffers_ptr = to_user_pointer(&obj),
> + .buffer_count = 1,
> + .rsvd1 = ctx,
> + .flags = e->flags,
> + };
> +
> + gem_execbuf(i915, &execbuf);
> + gem_sync(i915, obj.handle);
> +
> + delay(i915, e, obj.handle, obj.offset, target_ns);
> +
> + obj.flags |= EXEC_OBJECT_PINNED;
> + return obj;
> +}
> +
> +static void tslog(int i915,
> + const struct intel_execution_engine2 *e,
> + uint32_t handle,
> + uint64_t addr)
> +{
> + const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
> + const uint32_t base = gem_engine_mmio_base(i915, e->name);
> +#define CS_GPR(x) (base + 0x600 + 8 * (x))
> +#define CS_TIMESTAMP (base + 0x358)
> + enum { ONE, MASK, ADDR };
> + uint32_t *timestamp_lo, *addr_lo;
> + uint32_t *map, *cs;
> +
> + igt_require(base);
> +
> + map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
> + cs = map + 512;
> +
> + *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
> + *cs++ = CS_TIMESTAMP;
> + timestamp_lo = cs;
> + *cs++ = addr;
> + *cs++ = addr >> 32;
> +
> + *cs++ = MI_LOAD_REGISTER_IMM;
> + *cs++ = CS_GPR(ADDR);
> + addr_lo = cs;
> + *cs++ = addr;
> + *cs++ = MI_LOAD_REGISTER_IMM;
> + *cs++ = CS_GPR(ADDR) + 4;
> + *cs++ = addr >> 32;
> +
> + *cs++ = MI_LOAD_REGISTER_IMM;
> + *cs++ = CS_GPR(ONE);
> + *cs++ = 4;
> + *cs++ = MI_LOAD_REGISTER_IMM;
> + *cs++ = CS_GPR(ONE) + 4;
> + *cs++ = 0;
> +
> + *cs++ = MI_LOAD_REGISTER_IMM;
> + *cs++ = CS_GPR(MASK);
> + *cs++ = 0xfffff7ff;
> + *cs++ = MI_LOAD_REGISTER_IMM;
> + *cs++ = CS_GPR(MASK) + 4;
> + *cs++ = 0xffffffff;
> +
> + *cs++ = MI_MATH(8);
> + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ONE));
> + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
> + *cs++ = MI_MATH_ADD;
> + *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
> + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
> + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
> + *cs++ = MI_MATH_AND;
> + *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
> +
> + *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
> + *cs++ = CS_GPR(ADDR);
> + *cs++ = addr + offset_in_page(timestamp_lo);
> + *cs++ = addr >> 32;
> + *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
> + *cs++ = CS_GPR(ADDR);
> + *cs++ = addr + offset_in_page(addr_lo);
> + *cs++ = addr >> 32;
> +
> + *cs++ = MI_BATCH_BUFFER_END;
> +
> + munmap(map, 4096);
> +}
> +
> +static struct drm_i915_gem_exec_object2
> +tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
> +{
> + struct drm_i915_gem_exec_object2 obj = {
> + .handle = batch_create(i915),
> + .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
> + };
> + struct drm_i915_gem_execbuffer2 execbuf = {
> + .buffers_ptr = to_user_pointer(&obj),
> + .buffer_count = 1,
> + .rsvd1 = ctx,
> + .flags = e->flags,
> + };
> +
> + gem_execbuf(i915, &execbuf);
> + gem_sync(i915, obj.handle);
> +
> + tslog(i915, e, obj.handle, obj.offset);
> +
> + obj.flags |= EXEC_OBJECT_PINNED;
> + return obj;
> +}
> +
> +static int cmp_u32(const void *A, const void *B)
> +{
> + const unsigned long *a = A, *b = B;
> +
> + if (*a < *b)
> + return -1;
> + else if (*a > *b)
> + return 1;
> + else
> + return 0;
> +}
> +
> +static void fair_child(int i915, uint32_t ctx,
> + const struct intel_execution_engine2 *e,
> + uint64_t frame_ns,
> + int timeout,
> + int timeline,
> + unsigned int flags,
> + unsigned long *ctl,
> + unsigned long *out)
> +#define F_PACING 0x1
> +#define F_EXTERNAL 0x2
> +{
> + const int batches_per_frame = 3;
> + struct drm_i915_gem_exec_object2 prev =
> + delay_create(i915, ctx, e, frame_ns / batches_per_frame);
> + struct drm_i915_gem_exec_object2 next =
> + delay_create(i915, ctx, e, frame_ns / batches_per_frame);
> + struct drm_i915_gem_exec_object2 ts = tslog_create(i915, ctx, e);
> + struct timespec tv = {};
> + unsigned long count = 0;
> + int p_fence = -1, n_fence = -1;
> + uint32_t *map;
> + int n;
> +
> + igt_nsec_elapsed(&tv);
> + while (!READ_ONCE(*ctl)) {
> + struct drm_i915_gem_execbuffer2 execbuf = {
> + .buffers_ptr = to_user_pointer(&next),
> + .buffer_count = 1,
> + .rsvd1 = ctx,
> + .rsvd2 = -1,
> + .flags = e->flags,
> + };
> +
> + if (flags & F_EXTERNAL) {
> + execbuf.rsvd2 =
> + sw_sync_timeline_create_fence(timeline, count);
> + execbuf.flags |= I915_EXEC_FENCE_IN;
> + }
> +
> + execbuf.flags |= I915_EXEC_FENCE_OUT;
> + gem_execbuf_wr(i915, &execbuf);
> + n_fence = execbuf.rsvd2 >> 32;
> + execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
> + for (n = 1; n < batches_per_frame; n++)
> + gem_execbuf(i915, &execbuf);
> +
> + execbuf.buffers_ptr = to_user_pointer(&ts);
> + execbuf.batch_start_offset = 2048;
> + gem_execbuf(i915, &execbuf);
> +
> + if (flags & F_PACING && p_fence != -1) {
> + struct pollfd pfd = {
> + .fd = p_fence,
> + .events = POLLIN,
> + };
> + poll(&pfd, 1, -1);
> + }
> + close(p_fence);
> + close(execbuf.rsvd2);
> +
> + igt_swap(prev, next);
> + igt_swap(p_fence, n_fence);
> + count++;
> + }
> + gem_sync(i915, prev.handle);
> + close(p_fence);
> +
> + gem_close(i915, next.handle);
> + gem_close(i915, prev.handle);
> +
> + map = gem_mmap__device_coherent(i915, ts.handle, 0, 4096, PROT_WRITE);
> + for (n = 1; n < min(count, 512); n++)
> + map[n - 1] = map[n] - map[n - 1];
> + qsort(map, --n, sizeof(*map), cmp_u32);
> + *out = ticks_to_ns(i915, map[n / 2]);
> + munmap(map, 4096);
> +
> + gem_close(i915, ts.handle);
> +}
> +
> +static int cmp_ul(const void *A, const void *B)
> +{
> + const unsigned long *a = A, *b = B;
> +
> + if (*a < *b)
> + return -1;
> + else if (*a > *b)
> + return 1;
> + else
> + return 0;
> +}
> +
> +static void fairness(int i915,
> + const struct intel_execution_engine2 *e,
> + int timeout, unsigned int flags)
> +{
> + const int frame_ns = 16666 * 1000;
> + unsigned long *result;
> +
> + igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
> + igt_require(gem_class_has_mutable_submission(i915, e->class));
> +
> + result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> +
> + for (int n = 2; n <= 16; n <<= 1) {
> + int timeline = sw_sync_timeline_create();
> + int nframes = timeout * NSEC_PER_SEC / frame_ns + 1;
> + const int nchild = n - 1; /* odd for easy medians */
> + const int lo = nchild / 4;
> + const int hi = (3 * nchild + 3) / 4 - 1;
> + struct igt_mean m;
> +
> + memset(result, 0, (nchild + 1) * sizeof(result[0]));
> + igt_fork(child, nchild) {
> + uint32_t ctx = gem_context_clone_with_engines(i915, 0);
> +
> + fair_child(i915, ctx, e, frame_ns / nchild,
> + timeout, timeline, flags,
> + &result[nchild],
> + &result[child]);
> +
> + gem_context_destroy(i915, ctx);
> + }
> +
> + while (nframes--) {
> + struct timespec tv = { .tv_nsec = frame_ns };
> + nanosleep(&tv, NULL);
> + sw_sync_timeline_inc(timeline, 1);
> + }
> + result[nchild] = 1;
> + for (int child = 0; child < nchild; child++) {
> + while (!READ_ONCE(result[child])) {
> + struct timespec tv = { .tv_nsec = frame_ns };
> + nanosleep(&tv, NULL);
> + sw_sync_timeline_inc(timeline, 1);
> + }
> + }
> + igt_waitchildren();
> + close(timeline);
> +
> + igt_mean_init(&m);
> + for (int child = 0; child < nchild; child++)
> + igt_mean_add(&m, result[child]);
> +
> + qsort(result, nchild, sizeof(*result), cmp_ul);
> + igt_info("%d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
> + nchild,
> + 1e-6 * result[0], 1e-6 * result[nchild - 1],
> + 1e-6 * result[lo], 1e-6 * result[hi],
> + 1e-6 * result[nchild / 2],
> + 1e-6 * igt_mean_get(&m),
> + 1e-6 * sqrt(igt_mean_get_variance(&m)));
> +
> +#if 0
> + /* Mean within 10% of target */
> + igt_assert( 9 * igt_mean_get(&m) > 10 * frame_ns &&
> + 10 * igt_mean_get(&m) < 9 * frame_ns);
> +
> + /* Variance [inter-quartile range] is less than 33% of median */
> + igt_assert(3 * result[hi] - result[lo] < result[nchild / 2]);
> +#endif
> + }
> +
> + munmap(result, 4096);
> +}
> +
> #define test_each_engine(T, i915, e) \
> igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
> igt_dynamic_f("%s", e->name)
> @@ -2589,6 +3000,13 @@ igt_main
> test_each_engine_store("promotion", fd, e)
> promotion(fd, e->flags);
>
> + test_each_engine_store("fair-none", fd, e)
> + fairness(fd, e, 2, 0);
> + test_each_engine_store("fair-pace", fd, e)
> + fairness(fd, e, 2, F_PACING);
> + test_each_engine_store("fair-sync", fd, e)
> + fairness(fd, e, 2, F_PACING | F_EXTERNAL);
> +
> igt_subtest_group {
> igt_fixture {
> igt_require(gem_scheduler_has_preemption(fd));
> --
> 2.27.0.rc2
>
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
2020-06-02 9:18 ` [Intel-gfx] [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness Mika Kuoppala
@ 2020-06-02 9:23 ` Chris Wilson
0 siblings, 0 replies; 4+ messages in thread
From: Chris Wilson @ 2020-06-02 9:23 UTC (permalink / raw)
To: Mika Kuoppala, intel-gfx; +Cc: igt-dev
Quoting Mika Kuoppala (2020-06-02 10:18:34)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
>
> > An important property for multi-client systems is that each client gets
> > a 'fair' allotment of system time. (Where fairness is at the whim of the
> > context properties, such as priorities.) This test forks N independent
> > clients (albeit they happen to share a single vm), and does an equal
> > amount of work in client and asserts that they take an equal amount of
> > time.
> >
> > Though we have never claimed to have a completely fair scheduler, that
> > is what is expected.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Ramalingam C <ramalingam.c@intel.com>
> > ---
> > tests/i915/gem_exec_schedule.c | 418 +++++++++++++++++++++++++++++++++
> > 1 file changed, 418 insertions(+)
> >
> > diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
> > index 56c638833..d1121ecd2 100644
> > --- a/tests/i915/gem_exec_schedule.c
> > +++ b/tests/i915/gem_exec_schedule.c
> > @@ -2495,6 +2495,417 @@ static void measure_semaphore_power(int i915)
> > rapl_close(&pkg);
> > }
> >
> > +static int read_timestamp_frequency(int i915)
> > +{
> > + int value = 0;
> > + drm_i915_getparam_t gp = {
> > + .value = &value,
> > + .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
> > + };
> > + ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
> > + return value;
> > +}
> > +
> > +static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
> > +{
> > + return (x + y - 1) / y;
> > +}
> > +
> > +static uint64_t ns_to_ticks(int i915, uint64_t ns)
> > +{
> > + return div64_u64_round_up(ns * read_timestamp_frequency(i915),
> > + NSEC_PER_SEC);
> > +}
> > +
> > +static uint64_t ticks_to_ns(int i915, uint64_t ticks)
> > +{
> > + return div64_u64_round_up(ticks * NSEC_PER_SEC,
> > + read_timestamp_frequency(i915));
> > +}
> > +
> > +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
> > +
> > +#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
> > +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
> > +/* Opcodes for MI_MATH_INSTR */
> > +#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
> > +#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
> > +#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
> > +#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
> > +#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
> > +#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
> > +#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
> > +#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
> > +#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
> > +#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
> > +#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
> > +#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
> > +/* Registers used as operands in MI_MATH_INSTR */
> > +#define MI_MATH_REG(x) (x)
> > +#define MI_MATH_REG_SRCA 0x20
> > +#define MI_MATH_REG_SRCB 0x21
> > +#define MI_MATH_REG_ACCU 0x31
> > +#define MI_MATH_REG_ZF 0x32
> > +#define MI_MATH_REG_CF 0x33
>
> Are you thinking that we should just pull in the driver gpu_commands.h
> as is into lib?
Yes. We should at least share the header for mi commands between the
kernel and igt.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-06-02 9:23 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-06-02 0:26 [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness Chris Wilson
2020-06-02 0:58 ` [igt-dev] ✓ Fi.CI.BAT: success for i915/gem_exec_schedule: Try to spot unfairness (rev4) Patchwork
2020-06-02 9:18 ` [Intel-gfx] [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness Mika Kuoppala
2020-06-02 9:23 ` [igt-dev] [Intel-gfx] " Chris Wilson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox