* [igt-dev] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching
@ 2019-06-04 8:13 Chris Wilson
2019-06-04 8:13 ` [igt-dev] [PATCH i-g-t 2/2] i915/gem_exec_latency: Add another variant of waiter latency Chris Wilson
` (3 more replies)
0 siblings, 4 replies; 6+ messages in thread
From: Chris Wilson @ 2019-06-04 8:13 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev
Measure the baseline latency between contexts in order to directly
compare that with the additional cost of preemption.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
tests/i915/gem_exec_latency.c | 230 ++++++++++++++++++++++++++++++++++
1 file changed, 230 insertions(+)
diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c
index e56d62780..e88fbbc6a 100644
--- a/tests/i915/gem_exec_latency.c
+++ b/tests/i915/gem_exec_latency.c
@@ -410,6 +410,86 @@ static void latency_from_ring(int fd,
}
}
+static void execution_latency(int i915, unsigned int ring, const char *name)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = gem_create(i915, 4095),
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .flags = ring | LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT,
+ };
+ const unsigned int mmio_base = 0x2000;
+ const unsigned int cs_timestamp = mmio_base + 0x358;
+ volatile uint32_t *timestamp;
+ uint32_t *cs, *result;
+
+ timestamp =
+ (volatile uint32_t *)((volatile char *)igt_global_mmio + cs_timestamp);
+
+ obj.handle = gem_create(i915, 4096);
+ obj.flags = EXEC_OBJECT_PINNED;
+ result = gem_mmap__wc(i915, obj.handle, 0, 4096, PROT_WRITE);
+
+ for (int i = 0; i < 16; i++) {
+ cs = result + 16 * i;
+ *cs++ = 0x24 << 23 | 2; /* SRM */
+ *cs++ = cs_timestamp;
+ *cs++ = 4096 - 16 * 4 + i * 4;
+ *cs++ = 0;
+ *cs++ = 0xa << 23;
+ }
+
+ cs = result + 1024 - 16;
+
+ for (int length = 2; length <= 16; length <<= 1) {
+ struct igt_mean submit, batch, total;
+ int last = length - 1;
+
+ igt_mean_init(&submit);
+ igt_mean_init(&batch);
+ igt_mean_init(&total);
+
+ igt_until_timeout(2) {
+ uint32_t now, end;
+
+ cs[last] = 0;
+
+ now = *timestamp;
+ for (int i = 0; i < length; i++) {
+ execbuf.batch_start_offset = 64 * i;
+ gem_execbuf(i915, &execbuf);
+ }
+ while (!((volatile uint32_t *)cs)[last])
+ ;
+ end = *timestamp;
+
+ igt_mean_add(&submit, (cs[0] - now) * rcs_clock);
+ igt_mean_add(&batch, (cs[last] - cs[0]) * rcs_clock / last);
+ igt_mean_add(&total, (end - now) * rcs_clock);
+ }
+
+ igt_info("%sx%d Submission latency: %.2f±%.2fus\n",
+ name, length,
+ 1e-3 * igt_mean_get(&submit),
+ 1e-3 * sqrt(igt_mean_get_variance(&submit)));
+
+ igt_info("%sx%d Inter-batch latency: %.2f±%.2fus\n",
+ name, length,
+ 1e-3 * igt_mean_get(&batch),
+ 1e-3 * sqrt(igt_mean_get_variance(&batch)));
+
+ igt_info("%sx%d End-to-end latency: %.2f±%.2fus\n",
+ name, length,
+ 1e-3 * igt_mean_get(&total),
+ 1e-3 * sqrt(igt_mean_get_variance(&total)));
+ }
+
+ munmap(result, 4096);
+ gem_close(i915, obj.handle);
+}
+
static void
__submit_spin(int fd, igt_spin_t *spin, unsigned int flags)
{
@@ -616,6 +696,142 @@ rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned in
munmap(results, MMAP_SZ);
}
+static void context_switch(int i915,
+ unsigned int engine, const char *name,
+ unsigned int flags)
+{
+ struct drm_i915_gem_exec_object2 obj[2];
+ struct drm_i915_gem_relocation_entry reloc[5];
+ struct drm_i915_gem_execbuffer2 eb;
+ uint32_t *cs, *bbe, *results, v;
+ unsigned int mmio_base;
+ struct igt_mean mean;
+ uint32_t ctx[2];
+
+ /* XXX i915_query()! */
+ switch (engine) {
+ case I915_EXEC_DEFAULT:
+ case I915_EXEC_RENDER:
+ mmio_base = 0x2000;
+ break;
+#if 0
+ case I915_EXEC_BSD:
+ mmio_base = 0x12000;
+ break;
+#endif
+ case I915_EXEC_BLT:
+ mmio_base = 0x22000;
+ break;
+
+ case I915_EXEC_VEBOX:
+ if (intel_gen(intel_get_drm_devid(i915)) >= 11)
+ mmio_base = 0x1d8000;
+ else
+ mmio_base = 0x1a000;
+ break;
+
+ default:
+ igt_skip("mmio base not known\n");
+ }
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+ ctx[i] = gem_context_create(i915);
+
+ if (flags & PREEMPT) {
+ gem_context_set_priority(i915, ctx[0], -1023);
+ gem_context_set_priority(i915, ctx[1], +1023);
+ }
+
+ memset(obj, 0, sizeof(obj));
+ obj[0].handle = gem_create(i915, 4096);
+ gem_set_caching(i915, obj[0].handle, 1);
+ results = gem_mmap__cpu(i915, obj[0].handle, 0, 4096, PROT_READ);
+ gem_set_domain(i915, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
+
+ obj[1].handle = gem_create(i915, 4096);
+ memset(reloc,0, sizeof(reloc));
+ obj[1].relocation_count = ARRAY_SIZE(reloc);
+ obj[1].relocs_ptr = to_user_pointer(reloc);
+ bbe = gem_mmap__wc(i915, obj[1].handle, 0, 4096, PROT_WRITE);
+ gem_set_domain(i915, obj[1].handle,
+ I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+
+ cs = bbe;
+ *cs++ = 0x5 << 23;
+ *cs++ = 0x24 << 23 | 2; /* SRM */
+ *cs++ = mmio_base + 0x358; /* TIMESTAMP */
+ reloc[0].target_handle = obj[0].handle;
+ reloc[0].offset = (cs - bbe) * sizeof(*cs);
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = MI_BATCH_BUFFER_START | 1;
+ reloc[1].target_handle = obj[1].handle;
+ reloc[1].offset = (cs - bbe) * sizeof(*cs);
+ *cs++ = 0;
+ *cs++ = 0;
+
+ cs = bbe + 64;
+ *cs++ = 0x24 << 23 | 2; /* SRM */
+ *cs++ = mmio_base + 0x358; /* TIMESTAMP */
+ reloc[2].target_handle = obj[0].handle;
+ reloc[2].offset = (cs - bbe) * sizeof(*cs);
+ *cs++ = reloc[2].delta = 4;
+ *cs++ = 0;
+ *cs++ = 0x29 << 23 | 2; /* LRM */
+ *cs++ = mmio_base + 0x600; /* GPR0 */
+ reloc[3].target_handle = obj[0].handle;
+ reloc[3].offset = (cs - bbe) * sizeof(*cs);
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0x24 << 23 | 2; /* SRM */
+ *cs++ = mmio_base + 0x600; /* GPR0 */
+ reloc[4].target_handle = obj[0].handle;
+ reloc[4].offset = (cs - bbe) * sizeof(*cs);
+ *cs++ = reloc[4].delta = 8;
+ *cs++ = 0;
+ *cs++ = 0xa << 23;
+
+ memset(&eb, 0, sizeof(eb));
+ eb.buffers_ptr = to_user_pointer(obj);
+ eb.buffer_count = ARRAY_SIZE(obj);
+ eb.flags = engine;
+ eb.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+ v = 0;
+ igt_mean_init(&mean);
+ igt_until_timeout(5) {
+ eb.rsvd1 = ctx[0];
+ eb.batch_start_offset = 0;
+ gem_execbuf(i915, &eb);
+
+ while (results[0] == v)
+ igt_assert(gem_bo_busy(i915, obj[1].handle));
+
+ eb.rsvd1 = ctx[1];
+ eb.batch_start_offset = 64 * sizeof(*cs);
+ gem_execbuf(i915, &eb);
+
+ *bbe = 0xa << 23;
+ gem_sync(i915, obj[1].handle);
+ *bbe = 0x5 << 23;
+
+ v = results[0];
+ igt_mean_add(&mean, (results[1] - results[2]) * rcs_clock);
+ }
+ igt_info("%s context switch latency%s: %.2f±%.2fus\n",
+ name, flags & PREEMPT ? " (preempt)" : "",
+ 1e-3 * igt_mean_get(&mean),
+ 1e-3 * sqrt(igt_mean_get_variance(&mean)));
+ munmap(results, 4096);
+ munmap(bbe, 4096);
+
+ for (int i = 0; i < ARRAY_SIZE(obj); i++)
+ gem_close(i915, obj[i].handle);
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+ gem_context_destroy(i915, ctx[i]);
+}
+
static double clockrate(int i915, int reg)
{
volatile uint32_t *mmio;
@@ -722,6 +938,11 @@ igt_main
e->name,
0);
+ igt_subtest_f("%s-execution-latency", e->name)
+ execution_latency(device,
+ e->exec_id | e->flags,
+ e->name);
+
igt_subtest_f("%s-live-dispatch-queued", e->name)
latency_on_ring(device,
e->exec_id | e->flags,
@@ -741,12 +962,21 @@ igt_main
e->exec_id | e->flags,
e->name, CORK);
+ igt_subtest_f("%s-cs", e->name)
+ context_switch(device,
+ e->exec_id | e->flags,
+ e->name, 0);
igt_subtest_group {
igt_fixture {
gem_require_contexts(device);
igt_require(gem_scheduler_has_preemption(device));
}
+ igt_subtest_f("%s-cs-preempt", e->name)
+ context_switch(device,
+ e->exec_id | e->flags,
+ e->name, PREEMPT);
+
igt_subtest_f("%s-preemption", e->name)
latency_from_ring(device,
e->exec_id | e->flags,
--
2.20.1
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply related [flat|nested] 6+ messages in thread* [igt-dev] [PATCH i-g-t 2/2] i915/gem_exec_latency: Add another variant of waiter latency 2019-06-04 8:13 [igt-dev] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching Chris Wilson @ 2019-06-04 8:13 ` Chris Wilson 2019-06-04 10:11 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] i915/gem_exec_latency: Measure the latency of context switching Patchwork ` (2 subsequent siblings) 3 siblings, 0 replies; 6+ messages in thread From: Chris Wilson @ 2019-06-04 8:13 UTC (permalink / raw) To: intel-gfx; +Cc: igt-dev Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- tests/i915/gem_exec_latency.c | 81 +++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c index e88fbbc6a..fd4ceb4d6 100644 --- a/tests/i915/gem_exec_latency.c +++ b/tests/i915/gem_exec_latency.c @@ -490,6 +490,83 @@ static void execution_latency(int i915, unsigned int ring, const char *name) gem_close(i915, obj.handle); } +static void wakeup_latency(int i915, unsigned int ring, const char *name) +{ + struct drm_i915_gem_exec_object2 obj = { + .handle = gem_create(i915, 4095), + }; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + .flags = ring | LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT, + }; + const unsigned int mmio_base = 0x2000; + const unsigned int cs_timestamp = mmio_base + 0x358; + volatile uint32_t *timestamp; + struct igt_mean wakeup; + uint32_t *cs, *result; + + timestamp = + (volatile uint32_t *)((volatile char *)igt_global_mmio + cs_timestamp); + + obj.handle = gem_create(i915, 4096); + obj.flags = EXEC_OBJECT_PINNED; + result = gem_mmap__wc(i915, obj.handle, 0, 4096, PROT_WRITE); + + cs = result; + + *cs++ = 0x24 << 23 | 2; /* SRM */ + *cs++ = cs_timestamp; + *cs++ = 4096 - 16 * 4; + *cs++ = 0; + + *cs++ = MI_BATCH_BUFFER_START | 1; + *cs++ = 0; + *cs++ = 0; + + *cs++ = 0x24 << 23 | 2; /* SRM */ + *cs++ = cs_timestamp; + *cs++ = 4096 - 16 * 4 + 4; + *cs++ = 0; + *cs++ = 0xa << 23; + + cs = result + 1024 - 16; + + { + struct sched_param p = { .sched_priority = 99 }; + sched_setscheduler(0, SCHED_FIFO | SCHED_RESET_ON_FORK, &p); + } + + igt_mean_init(&wakeup); + igt_until_timeout(2) { + uint32_t end; + + igt_fork(child, 1) { + result[4] = MI_BATCH_BUFFER_START | 1; + cs[0] = 0; + + gem_execbuf(i915, &execbuf); + + while (!cs[0]) + ; + result[4] = 0; + __sync_synchronize(); + } + gem_sync(i915, obj.handle); + end = *timestamp; + + igt_mean_add(&wakeup, (end - cs[1]) * rcs_clock); + igt_waitchildren(); + } + igt_info("%s Wakeup latency: %.2f±%.2fms [%.2f, %.2f]\n", name, + 1e-6 * igt_mean_get(&wakeup), + 1e-6 * sqrt(igt_mean_get_variance(&wakeup)), + 1e-6 * wakeup.min, 1e-6 * wakeup.max); + + munmap(result, 4096); + gem_close(i915, obj.handle); +} + static void __submit_spin(int fd, igt_spin_t *spin, unsigned int flags) { @@ -942,6 +1019,10 @@ igt_main execution_latency(device, e->exec_id | e->flags, e->name); + igt_subtest_f("%s-wakeup-latency", e->name) + wakeup_latency(device, + e->exec_id | e->flags, + e->name); igt_subtest_f("%s-live-dispatch-queued", e->name) latency_on_ring(device, -- 2.20.1 _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply related [flat|nested] 6+ messages in thread
* [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] i915/gem_exec_latency: Measure the latency of context switching 2019-06-04 8:13 [igt-dev] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching Chris Wilson 2019-06-04 8:13 ` [igt-dev] [PATCH i-g-t 2/2] i915/gem_exec_latency: Add another variant of waiter latency Chris Wilson @ 2019-06-04 10:11 ` Patchwork 2019-06-04 16:26 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork 2019-06-05 15:04 ` [Intel-gfx] [PATCH i-g-t 1/2] " Mika Kuoppala 3 siblings, 0 replies; 6+ messages in thread From: Patchwork @ 2019-06-04 10:11 UTC (permalink / raw) To: Chris Wilson; +Cc: igt-dev == Series Details == Series: series starting with [i-g-t,1/2] i915/gem_exec_latency: Measure the latency of context switching URL : https://patchwork.freedesktop.org/series/61566/ State : success == Summary == CI Bug Log - changes from CI_DRM_6183 -> IGTPW_3098 ==================================================== Summary ------- **SUCCESS** No regressions found. External URL: https://patchwork.freedesktop.org/api/1.0/series/61566/revisions/1/mbox/ Known issues ------------ Here are the changes found in IGTPW_3098 that come from known issues: ### IGT changes ### #### Issues hit #### * igt@kms_frontbuffer_tracking@basic: - fi-icl-u3: [PASS][1] -> [FAIL][2] ([fdo#103167]) [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/fi-icl-u3/igt@kms_frontbuffer_tracking@basic.html [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/fi-icl-u3/igt@kms_frontbuffer_tracking@basic.html #### Possible fixes #### * igt@gem_ctx_create@basic: - fi-icl-u3: [DMESG-WARN][3] ([fdo#107724]) -> [PASS][4] [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/fi-icl-u3/igt@gem_ctx_create@basic.html [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/fi-icl-u3/igt@gem_ctx_create@basic.html * igt@i915_selftest@live_contexts: - fi-bdw-gvtdvm: [DMESG-FAIL][5] ([fdo#110235]) -> [PASS][6] [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html * igt@vgem_basic@dmabuf-export: - fi-icl-dsi: [INCOMPLETE][7] ([fdo#107713]) -> [PASS][8] [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/fi-icl-dsi/igt@vgem_basic@dmabuf-export.html [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/fi-icl-dsi/igt@vgem_basic@dmabuf-export.html {name}: This element is suppressed. This means it is ignored when computing the status of the difference (SUCCESS, WARNING, or FAILURE). [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167 [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713 [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724 [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569 [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271 [fdo#110235]: https://bugs.freedesktop.org/show_bug.cgi?id=110235 [fdo#110829]: https://bugs.freedesktop.org/show_bug.cgi?id=110829 Participating hosts (53 -> 46) ------------------------------ Missing (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-kbl-7560u fi-byt-clapper fi-bdw-samus Build changes ------------- * IGT: IGT_5033 -> IGTPW_3098 CI_DRM_6183: aff09dc14e1d9f03f9e6c8c157d4abccf4ca2b14 @ git://anongit.freedesktop.org/gfx-ci/linux IGTPW_3098: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/ IGT_5033: 8eddc43c75c2ae4ec6e3bee70e4f815e08a928c8 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools == Testlist changes == +igt@gem_exec_latency@blt-cs +igt@gem_exec_latency@blt-cs-preempt +igt@gem_exec_latency@blt-execution-latency +igt@gem_exec_latency@blt-wakeup-latency +igt@gem_exec_latency@bsd1-cs +igt@gem_exec_latency@bsd1-cs-preempt +igt@gem_exec_latency@bsd1-execution-latency +igt@gem_exec_latency@bsd1-wakeup-latency +igt@gem_exec_latency@bsd2-cs +igt@gem_exec_latency@bsd2-cs-preempt +igt@gem_exec_latency@bsd2-execution-latency +igt@gem_exec_latency@bsd2-wakeup-latency +igt@gem_exec_latency@bsd-cs +igt@gem_exec_latency@bsd-cs-preempt +igt@gem_exec_latency@bsd-execution-latency +igt@gem_exec_latency@bsd-wakeup-latency +igt@gem_exec_latency@render-cs +igt@gem_exec_latency@render-cs-preempt +igt@gem_exec_latency@render-execution-latency +igt@gem_exec_latency@render-wakeup-latency +igt@gem_exec_latency@vebox-cs +igt@gem_exec_latency@vebox-cs-preempt +igt@gem_exec_latency@vebox-execution-latency +igt@gem_exec_latency@vebox-wakeup-latency == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/ _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 6+ messages in thread
* [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/2] i915/gem_exec_latency: Measure the latency of context switching 2019-06-04 8:13 [igt-dev] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching Chris Wilson 2019-06-04 8:13 ` [igt-dev] [PATCH i-g-t 2/2] i915/gem_exec_latency: Add another variant of waiter latency Chris Wilson 2019-06-04 10:11 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] i915/gem_exec_latency: Measure the latency of context switching Patchwork @ 2019-06-04 16:26 ` Patchwork 2019-06-05 15:04 ` [Intel-gfx] [PATCH i-g-t 1/2] " Mika Kuoppala 3 siblings, 0 replies; 6+ messages in thread From: Patchwork @ 2019-06-04 16:26 UTC (permalink / raw) To: Chris Wilson; +Cc: igt-dev == Series Details == Series: series starting with [i-g-t,1/2] i915/gem_exec_latency: Measure the latency of context switching URL : https://patchwork.freedesktop.org/series/61566/ State : success == Summary == CI Bug Log - changes from CI_DRM_6183_full -> IGTPW_3098_full ==================================================== Summary ------- **SUCCESS** No regressions found. External URL: https://patchwork.freedesktop.org/api/1.0/series/61566/revisions/1/mbox/ Possible new issues ------------------- Here are the unknown changes that may have been introduced in IGTPW_3098_full: ### IGT changes ### #### Suppressed #### The following results come from untrusted machines, tests, or statuses. They do not affect the overall result. * {igt@gem_ctx_shared@exec-single-timeline-default}: - shard-iclb: NOTRUN -> [SKIP][1] [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-iclb4/igt@gem_ctx_shared@exec-single-timeline-default.html Known issues ------------ Here are the changes found in IGTPW_3098_full that come from known issues: ### IGT changes ### #### Issues hit #### * igt@gem_tiled_swapping@non-threaded: - shard-apl: [PASS][2] -> [FAIL][3] ([fdo#108686]) [2]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-apl5/igt@gem_tiled_swapping@non-threaded.html [3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-apl2/igt@gem_tiled_swapping@non-threaded.html * igt@kms_cursor_crc@pipe-b-cursor-128x128-onscreen: - shard-kbl: [PASS][4] -> [FAIL][5] ([fdo#103232]) [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-kbl2/igt@kms_cursor_crc@pipe-b-cursor-128x128-onscreen.html [5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-kbl6/igt@kms_cursor_crc@pipe-b-cursor-128x128-onscreen.html - shard-apl: [PASS][6] -> [FAIL][7] ([fdo#103232]) [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-apl2/igt@kms_cursor_crc@pipe-b-cursor-128x128-onscreen.html [7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-apl6/igt@kms_cursor_crc@pipe-b-cursor-128x128-onscreen.html * igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic: - shard-hsw: [PASS][8] -> [FAIL][9] ([fdo#105767]) [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-hsw7/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic.html [9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-hsw6/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic.html * igt@kms_flip@2x-flip-vs-suspend: - shard-hsw: [PASS][10] -> [SKIP][11] ([fdo#109271]) +4 similar issues [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-hsw6/igt@kms_flip@2x-flip-vs-suspend.html [11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-hsw1/igt@kms_flip@2x-flip-vs-suspend.html * igt@kms_flip@dpms-vs-vblank-race: - shard-glk: [PASS][12] -> [FAIL][13] ([fdo#103060]) [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-glk7/igt@kms_flip@dpms-vs-vblank-race.html [13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-glk1/igt@kms_flip@dpms-vs-vblank-race.html * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-pwrite: - shard-iclb: [PASS][14] -> [FAIL][15] ([fdo#103167]) +5 similar issues [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-iclb1/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-pwrite.html [15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-iclb2/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-pwrite.html * igt@kms_psr@psr2_cursor_mmap_cpu: - shard-iclb: [PASS][16] -> [SKIP][17] ([fdo#109441]) +1 similar issue [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-iclb2/igt@kms_psr@psr2_cursor_mmap_cpu.html [17]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-iclb4/igt@kms_psr@psr2_cursor_mmap_cpu.html * igt@kms_setmode@basic: - shard-apl: [PASS][18] -> [FAIL][19] ([fdo#99912]) [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-apl7/igt@kms_setmode@basic.html [19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-apl8/igt@kms_setmode@basic.html * igt@kms_vblank@pipe-c-ts-continuation-suspend: - shard-apl: [PASS][20] -> [DMESG-WARN][21] ([fdo#108566]) +5 similar issues [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-apl3/igt@kms_vblank@pipe-c-ts-continuation-suspend.html [21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-apl4/igt@kms_vblank@pipe-c-ts-continuation-suspend.html #### Possible fixes #### * igt@gem_tiled_swapping@non-threaded: - shard-hsw: [FAIL][22] ([fdo#108686]) -> [PASS][23] [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-hsw8/igt@gem_tiled_swapping@non-threaded.html [23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-hsw8/igt@gem_tiled_swapping@non-threaded.html - shard-kbl: [DMESG-WARN][24] ([fdo#108686]) -> [PASS][25] [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-kbl6/igt@gem_tiled_swapping@non-threaded.html [25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-kbl2/igt@gem_tiled_swapping@non-threaded.html * igt@kms_atomic_transition@plane-toggle-modeset-transition: - shard-iclb: [INCOMPLETE][26] ([fdo#107713]) -> [PASS][27] [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-iclb7/igt@kms_atomic_transition@plane-toggle-modeset-transition.html [27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-iclb7/igt@kms_atomic_transition@plane-toggle-modeset-transition.html * igt@kms_flip@2x-plain-flip: - shard-hsw: [SKIP][28] ([fdo#109271]) -> [PASS][29] +18 similar issues [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-hsw1/igt@kms_flip@2x-plain-flip.html [29]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-hsw2/igt@kms_flip@2x-plain-flip.html * igt@kms_frontbuffer_tracking@fbcpsr-1p-pri-indfb-multidraw: - shard-iclb: [FAIL][30] ([fdo#103167]) -> [PASS][31] +1 similar issue [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-iclb2/igt@kms_frontbuffer_tracking@fbcpsr-1p-pri-indfb-multidraw.html [31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-iclb5/igt@kms_frontbuffer_tracking@fbcpsr-1p-pri-indfb-multidraw.html * igt@kms_psr@psr2_primary_mmap_gtt: - shard-iclb: [SKIP][32] ([fdo#109441]) -> [PASS][33] +2 similar issues [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-iclb6/igt@kms_psr@psr2_primary_mmap_gtt.html [33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-iclb2/igt@kms_psr@psr2_primary_mmap_gtt.html * igt@kms_sysfs_edid_timing: - shard-hsw: [FAIL][34] ([fdo#100047]) -> [PASS][35] [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-hsw1/igt@kms_sysfs_edid_timing.html [35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-hsw2/igt@kms_sysfs_edid_timing.html * igt@perf_pmu@rc6: - shard-kbl: [SKIP][36] ([fdo#109271]) -> [PASS][37] [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-kbl2/igt@perf_pmu@rc6.html [37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-kbl1/igt@perf_pmu@rc6.html * igt@perf_pmu@rc6-runtime-pm-long: - shard-kbl: [FAIL][38] ([fdo#105010]) -> [PASS][39] [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-kbl6/igt@perf_pmu@rc6-runtime-pm-long.html [39]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-kbl1/igt@perf_pmu@rc6-runtime-pm-long.html #### Warnings #### * igt@gem_mmap_gtt@forked-big-copy-odd: - shard-iclb: [INCOMPLETE][40] ([fdo#107713] / [fdo#109100]) -> [TIMEOUT][41] ([fdo#109673]) [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-iclb4/igt@gem_mmap_gtt@forked-big-copy-odd.html [41]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-iclb8/igt@gem_mmap_gtt@forked-big-copy-odd.html {name}: This element is suppressed. This means it is ignored when computing the status of the difference (SUCCESS, WARNING, or FAILURE). [fdo#100047]: https://bugs.freedesktop.org/show_bug.cgi?id=100047 [fdo#103060]: https://bugs.freedesktop.org/show_bug.cgi?id=103060 [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167 [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232 [fdo#105010]: https://bugs.freedesktop.org/show_bug.cgi?id=105010 [fdo#105767]: https://bugs.freedesktop.org/show_bug.cgi?id=105767 [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713 [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566 [fdo#108686]: https://bugs.freedesktop.org/show_bug.cgi?id=108686 [fdo#109100]: https://bugs.freedesktop.org/show_bug.cgi?id=109100 [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271 [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441 [fdo#109673]: https://bugs.freedesktop.org/show_bug.cgi?id=109673 [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912 Participating hosts (10 -> 6) ------------------------------ Missing (4): pig-skl-6260u shard-skl pig-hsw-4770r pig-glk-j5005 Build changes ------------- * IGT: IGT_5033 -> IGTPW_3098 * Piglit: piglit_4509 -> None CI_DRM_6183: aff09dc14e1d9f03f9e6c8c157d4abccf4ca2b14 @ git://anongit.freedesktop.org/gfx-ci/linux IGTPW_3098: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/ IGT_5033: 8eddc43c75c2ae4ec6e3bee70e4f815e08a928c8 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/ _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Intel-gfx] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching 2019-06-04 8:13 [igt-dev] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching Chris Wilson ` (2 preceding siblings ...) 2019-06-04 16:26 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork @ 2019-06-05 15:04 ` Mika Kuoppala 2019-06-06 16:16 ` [igt-dev] " Chris Wilson 3 siblings, 1 reply; 6+ messages in thread From: Mika Kuoppala @ 2019-06-05 15:04 UTC (permalink / raw) To: Chris Wilson, intel-gfx; +Cc: igt-dev Chris Wilson <chris@chris-wilson.co.uk> writes: > Measure the baseline latency between contexts in order to directly > compare that with the additional cost of preemption. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > tests/i915/gem_exec_latency.c | 230 ++++++++++++++++++++++++++++++++++ > 1 file changed, 230 insertions(+) > > diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c > index e56d62780..e88fbbc6a 100644 > --- a/tests/i915/gem_exec_latency.c > +++ b/tests/i915/gem_exec_latency.c > @@ -410,6 +410,86 @@ static void latency_from_ring(int fd, > } > } > > +static void execution_latency(int i915, unsigned int ring, const char *name) > +{ > + struct drm_i915_gem_exec_object2 obj = { > + .handle = gem_create(i915, 4095), > + }; > + struct drm_i915_gem_execbuffer2 execbuf = { > + .buffers_ptr = to_user_pointer(&obj), > + .buffer_count = 1, > + .flags = ring | LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT, > + }; > + const unsigned int mmio_base = 0x2000; > + const unsigned int cs_timestamp = mmio_base + 0x358; > + volatile uint32_t *timestamp; > + uint32_t *cs, *result; > + > + timestamp = > + (volatile uint32_t *)((volatile char *)igt_global_mmio + cs_timestamp); > + > + obj.handle = gem_create(i915, 4096); > + obj.flags = EXEC_OBJECT_PINNED; > + result = gem_mmap__wc(i915, obj.handle, 0, 4096, PROT_WRITE); > + > + for (int i = 0; i < 16; i++) { > + cs = result + 16 * i; > + *cs++ = 0x24 << 23 | 2; /* SRM */ > + *cs++ = cs_timestamp; > + *cs++ = 4096 - 16 * 4 + i * 4; > + *cs++ = 0; > + *cs++ = 0xa << 23; Why not MI_BATCH_BUFFER_END? To emphasize that we have multiple batches inside a bo? > + } > + > + cs = result + 1024 - 16; > + > + for (int length = 2; length <= 16; length <<= 1) { > + struct igt_mean submit, batch, total; > + int last = length - 1; > + > + igt_mean_init(&submit); > + igt_mean_init(&batch); > + igt_mean_init(&total); > + > + igt_until_timeout(2) { > + uint32_t now, end; > + > + cs[last] = 0; > + > + now = *timestamp; > + for (int i = 0; i < length; i++) { > + execbuf.batch_start_offset = 64 * i; > + gem_execbuf(i915, &execbuf); > + } > + while (!((volatile uint32_t *)cs)[last]) > + ; > + end = *timestamp; > + > + igt_mean_add(&submit, (cs[0] - now) * rcs_clock); > + igt_mean_add(&batch, (cs[last] - cs[0]) * rcs_clock / last); Just curious of what do you use of inter batch latency? Oh and do we need to to take the rcs_clock resolution into account on result calculation. Prolly not as it seems to be ticking fast enough for 0.1us accuracy. > + igt_mean_add(&total, (end - now) * rcs_clock); > + } > + > + igt_info("%sx%d Submission latency: %.2f±%.2fus\n", > + name, length, > + 1e-3 * igt_mean_get(&submit), > + 1e-3 * sqrt(igt_mean_get_variance(&submit))); > + > + igt_info("%sx%d Inter-batch latency: %.2f±%.2fus\n", > + name, length, > + 1e-3 * igt_mean_get(&batch), > + 1e-3 * sqrt(igt_mean_get_variance(&batch))); > + > + igt_info("%sx%d End-to-end latency: %.2f±%.2fus\n", > + name, length, > + 1e-3 * igt_mean_get(&total), > + 1e-3 * sqrt(igt_mean_get_variance(&total))); > + } > + > + munmap(result, 4096); > + gem_close(i915, obj.handle); > +} > + > static void > __submit_spin(int fd, igt_spin_t *spin, unsigned int flags) > { > @@ -616,6 +696,142 @@ rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned in > munmap(results, MMAP_SZ); > } > > +static void context_switch(int i915, > + unsigned int engine, const char *name, > + unsigned int flags) > +{ > + struct drm_i915_gem_exec_object2 obj[2]; > + struct drm_i915_gem_relocation_entry reloc[5]; > + struct drm_i915_gem_execbuffer2 eb; > + uint32_t *cs, *bbe, *results, v; > + unsigned int mmio_base; > + struct igt_mean mean; > + uint32_t ctx[2]; > + > + /* XXX i915_query()! */ > + switch (engine) { > + case I915_EXEC_DEFAULT: > + case I915_EXEC_RENDER: > + mmio_base = 0x2000; > + break; > +#if 0 > + case I915_EXEC_BSD: > + mmio_base = 0x12000; > + break; > +#endif It seems to be there according to bspec. Timestamps don't work with it? > + case I915_EXEC_BLT: > + mmio_base = 0x22000; > + break; > + > + case I915_EXEC_VEBOX: > + if (intel_gen(intel_get_drm_devid(i915)) >= 11) > + mmio_base = 0x1d8000; > + else > + mmio_base = 0x1a000; > + break; > + > + default: > + igt_skip("mmio base not known\n"); > + } > + > + for (int i = 0; i < ARRAY_SIZE(ctx); i++) > + ctx[i] = gem_context_create(i915); > + > + if (flags & PREEMPT) { > + gem_context_set_priority(i915, ctx[0], -1023); > + gem_context_set_priority(i915, ctx[1], +1023); > + } > + > + memset(obj, 0, sizeof(obj)); > + obj[0].handle = gem_create(i915, 4096); > + gem_set_caching(i915, obj[0].handle, 1); > + results = gem_mmap__cpu(i915, obj[0].handle, 0, 4096, PROT_READ); > + gem_set_domain(i915, obj[0].handle, I915_GEM_DOMAIN_CPU, 0); > + > + obj[1].handle = gem_create(i915, 4096); > + memset(reloc,0, sizeof(reloc)); > + obj[1].relocation_count = ARRAY_SIZE(reloc); > + obj[1].relocs_ptr = to_user_pointer(reloc); > + bbe = gem_mmap__wc(i915, obj[1].handle, 0, 4096, PROT_WRITE); > + gem_set_domain(i915, obj[1].handle, > + I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC); > + > + cs = bbe; > + *cs++ = 0x5 << 23; /* ARB_CHK */ or even better MI_ARB_CHK; > + *cs++ = 0x24 << 23 | 2; /* SRM */ > + *cs++ = mmio_base + 0x358; /* TIMESTAMP */ > + reloc[0].target_handle = obj[0].handle; > + reloc[0].offset = (cs - bbe) * sizeof(*cs); > + *cs++ = 0; > + *cs++ = 0; > + *cs++ = MI_BATCH_BUFFER_START | 1; > + reloc[1].target_handle = obj[1].handle; > + reloc[1].offset = (cs - bbe) * sizeof(*cs); > + *cs++ = 0; > + *cs++ = 0; > + > + cs = bbe + 64; > + *cs++ = 0x24 << 23 | 2; /* SRM */ > + *cs++ = mmio_base + 0x358; /* TIMESTAMP */ > + reloc[2].target_handle = obj[0].handle; > + reloc[2].offset = (cs - bbe) * sizeof(*cs); > + *cs++ = reloc[2].delta = 4; > + *cs++ = 0; > + *cs++ = 0x29 << 23 | 2; /* LRM */ > + *cs++ = mmio_base + 0x600; /* GPR0 */ > + reloc[3].target_handle = obj[0].handle; > + reloc[3].offset = (cs - bbe) * sizeof(*cs); > + *cs++ = 0; > + *cs++ = 0; > + *cs++ = 0x24 << 23 | 2; /* SRM */ > + *cs++ = mmio_base + 0x600; /* GPR0 */ > + reloc[4].target_handle = obj[0].handle; > + reloc[4].offset = (cs - bbe) * sizeof(*cs); > + *cs++ = reloc[4].delta = 8; > + *cs++ = 0; > + *cs++ = 0xa << 23; > + > + memset(&eb, 0, sizeof(eb)); > + eb.buffers_ptr = to_user_pointer(obj); > + eb.buffer_count = ARRAY_SIZE(obj); > + eb.flags = engine; > + eb.flags |= LOCAL_I915_EXEC_NO_RELOC; > + > + v = 0; > + igt_mean_init(&mean); > + igt_until_timeout(5) { > + eb.rsvd1 = ctx[0]; > + eb.batch_start_offset = 0; > + gem_execbuf(i915, &eb); > + > + while (results[0] == v) > + igt_assert(gem_bo_busy(i915, obj[1].handle)); Is there a race there? Object will be busy short time after the result appears. > + > + eb.rsvd1 = ctx[1]; > + eb.batch_start_offset = 64 * sizeof(*cs); > + gem_execbuf(i915, &eb); > + > + *bbe = 0xa << 23; __sync_syncronize or are you relying on the corresponding syscalls? -Mika > + gem_sync(i915, obj[1].handle); > + *bbe = 0x5 << 23; > + > + v = results[0]; > + igt_mean_add(&mean, (results[1] - results[2]) * rcs_clock); > + } > + igt_info("%s context switch latency%s: %.2f±%.2fus\n", > + name, flags & PREEMPT ? " (preempt)" : "", > + 1e-3 * igt_mean_get(&mean), > + 1e-3 * sqrt(igt_mean_get_variance(&mean))); > + munmap(results, 4096); > + munmap(bbe, 4096); > + > + for (int i = 0; i < ARRAY_SIZE(obj); i++) > + gem_close(i915, obj[i].handle); > + > + for (int i = 0; i < ARRAY_SIZE(ctx); i++) > + gem_context_destroy(i915, ctx[i]); > +} > + > static double clockrate(int i915, int reg) > { > volatile uint32_t *mmio; > @@ -722,6 +938,11 @@ igt_main > e->name, > 0); > > + igt_subtest_f("%s-execution-latency", e->name) > + execution_latency(device, > + e->exec_id | e->flags, > + e->name); > + > igt_subtest_f("%s-live-dispatch-queued", e->name) > latency_on_ring(device, > e->exec_id | e->flags, > @@ -741,12 +962,21 @@ igt_main > e->exec_id | e->flags, > e->name, CORK); > > + igt_subtest_f("%s-cs", e->name) > + context_switch(device, > + e->exec_id | e->flags, > + e->name, 0); > igt_subtest_group { > igt_fixture { > gem_require_contexts(device); > igt_require(gem_scheduler_has_preemption(device)); > } > > + igt_subtest_f("%s-cs-preempt", e->name) > + context_switch(device, > + e->exec_id | e->flags, > + e->name, PREEMPT); > + > igt_subtest_f("%s-preemption", e->name) > latency_from_ring(device, > e->exec_id | e->flags, > -- > 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [igt-dev] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching 2019-06-05 15:04 ` [Intel-gfx] [PATCH i-g-t 1/2] " Mika Kuoppala @ 2019-06-06 16:16 ` Chris Wilson 0 siblings, 0 replies; 6+ messages in thread From: Chris Wilson @ 2019-06-06 16:16 UTC (permalink / raw) To: Mika Kuoppala, intel-gfx; +Cc: igt-dev Quoting Mika Kuoppala (2019-06-05 16:04:09) > Chris Wilson <chris@chris-wilson.co.uk> writes: > > > Measure the baseline latency between contexts in order to directly > > compare that with the additional cost of preemption. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > tests/i915/gem_exec_latency.c | 230 ++++++++++++++++++++++++++++++++++ > > 1 file changed, 230 insertions(+) > > > > diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c > > index e56d62780..e88fbbc6a 100644 > > --- a/tests/i915/gem_exec_latency.c > > +++ b/tests/i915/gem_exec_latency.c > > @@ -410,6 +410,86 @@ static void latency_from_ring(int fd, > > } > > } > > > > +static void execution_latency(int i915, unsigned int ring, const char *name) > > +{ > > + struct drm_i915_gem_exec_object2 obj = { > > + .handle = gem_create(i915, 4095), > > + }; > > + struct drm_i915_gem_execbuffer2 execbuf = { > > + .buffers_ptr = to_user_pointer(&obj), > > + .buffer_count = 1, > > + .flags = ring | LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT, > > + }; > > + const unsigned int mmio_base = 0x2000; > > + const unsigned int cs_timestamp = mmio_base + 0x358; > > + volatile uint32_t *timestamp; > > + uint32_t *cs, *result; > > + > > + timestamp = > > + (volatile uint32_t *)((volatile char *)igt_global_mmio + cs_timestamp); > > + > > + obj.handle = gem_create(i915, 4096); > > + obj.flags = EXEC_OBJECT_PINNED; > > + result = gem_mmap__wc(i915, obj.handle, 0, 4096, PROT_WRITE); > > + > > + for (int i = 0; i < 16; i++) { > > + cs = result + 16 * i; > > + *cs++ = 0x24 << 23 | 2; /* SRM */ > > + *cs++ = cs_timestamp; > > + *cs++ = 4096 - 16 * 4 + i * 4; > > + *cs++ = 0; > > + *cs++ = 0xa << 23; > > Why not MI_BATCH_BUFFER_END? To emphasize that we have > multiple batches inside a bo? I wrote MI_BATCH_BUFFER_END... Oh, I see, no, I was just using the values for no other reason than habit. > > + } > > + > > + cs = result + 1024 - 16; > > + > > + for (int length = 2; length <= 16; length <<= 1) { > > + struct igt_mean submit, batch, total; > > + int last = length - 1; > > + > > + igt_mean_init(&submit); > > + igt_mean_init(&batch); > > + igt_mean_init(&total); > > + > > + igt_until_timeout(2) { > > + uint32_t now, end; > > + > > + cs[last] = 0; > > + > > + now = *timestamp; > > + for (int i = 0; i < length; i++) { > > + execbuf.batch_start_offset = 64 * i; > > + gem_execbuf(i915, &execbuf); > > + } > > + while (!((volatile uint32_t *)cs)[last]) > > + ; > > + end = *timestamp; > > + > > + igt_mean_add(&submit, (cs[0] - now) * rcs_clock); > > + igt_mean_add(&batch, (cs[last] - cs[0]) * rcs_clock / last); > > Just curious of what do you use of inter batch latency? EPARSE. > Oh and do we need to to take the rcs_clock resolution into account > on result calculation. Prolly not as it seems to be ticking fast enough > for 0.1us accuracy. It just provides the lower granularity to results. It doesn't alter the calculations or presentation, just that quantum beneath which we cannot measure. Considering that the CS cannot go faster than its clock, it doesn't affect anything. > > + igt_mean_add(&total, (end - now) * rcs_clock); > > + } > > + > > + igt_info("%sx%d Submission latency: %.2f±%.2fus\n", > > + name, length, > > + 1e-3 * igt_mean_get(&submit), > > + 1e-3 * sqrt(igt_mean_get_variance(&submit))); > > + > > + igt_info("%sx%d Inter-batch latency: %.2f±%.2fus\n", > > + name, length, > > + 1e-3 * igt_mean_get(&batch), > > + 1e-3 * sqrt(igt_mean_get_variance(&batch))); > > + > > + igt_info("%sx%d End-to-end latency: %.2f±%.2fus\n", > > + name, length, > > + 1e-3 * igt_mean_get(&total), > > + 1e-3 * sqrt(igt_mean_get_variance(&total))); > > + } > > + > > + munmap(result, 4096); > > + gem_close(i915, obj.handle); > > +} > > + > > static void > > __submit_spin(int fd, igt_spin_t *spin, unsigned int flags) > > { > > @@ -616,6 +696,142 @@ rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned in > > munmap(results, MMAP_SZ); > > } > > > > +static void context_switch(int i915, > > + unsigned int engine, const char *name, > > + unsigned int flags) > > +{ > > + struct drm_i915_gem_exec_object2 obj[2]; > > + struct drm_i915_gem_relocation_entry reloc[5]; > > + struct drm_i915_gem_execbuffer2 eb; > > + uint32_t *cs, *bbe, *results, v; > > + unsigned int mmio_base; > > + struct igt_mean mean; > > + uint32_t ctx[2]; > > + > > + /* XXX i915_query()! */ > > + switch (engine) { > > + case I915_EXEC_DEFAULT: > > + case I915_EXEC_RENDER: > > + mmio_base = 0x2000; > > + break; > > +#if 0 > > + case I915_EXEC_BSD: > > + mmio_base = 0x12000; > > + break; > > +#endif > > It seems to be there according to bspec. Timestamps don't > work with it? Engine assignment is random. Old uapi is a mess. > > > + case I915_EXEC_BLT: > > + mmio_base = 0x22000; > > + break; > > + > > + case I915_EXEC_VEBOX: > > + if (intel_gen(intel_get_drm_devid(i915)) >= 11) > > + mmio_base = 0x1d8000; > > + else > > + mmio_base = 0x1a000; > > + break; > > + > > + default: > > + igt_skip("mmio base not known\n"); > > + } > > + > > + for (int i = 0; i < ARRAY_SIZE(ctx); i++) > > + ctx[i] = gem_context_create(i915); > > + > > + if (flags & PREEMPT) { > > + gem_context_set_priority(i915, ctx[0], -1023); > > + gem_context_set_priority(i915, ctx[1], +1023); > > + } > > + > > + memset(obj, 0, sizeof(obj)); > > + obj[0].handle = gem_create(i915, 4096); > > + gem_set_caching(i915, obj[0].handle, 1); > > + results = gem_mmap__cpu(i915, obj[0].handle, 0, 4096, PROT_READ); > > + gem_set_domain(i915, obj[0].handle, I915_GEM_DOMAIN_CPU, 0); > > + > > + obj[1].handle = gem_create(i915, 4096); > > + memset(reloc,0, sizeof(reloc)); > > + obj[1].relocation_count = ARRAY_SIZE(reloc); > > + obj[1].relocs_ptr = to_user_pointer(reloc); > > + bbe = gem_mmap__wc(i915, obj[1].handle, 0, 4096, PROT_WRITE); > > + gem_set_domain(i915, obj[1].handle, > > + I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC); > > + > > + cs = bbe; > > + *cs++ = 0x5 << 23; > > /* ARB_CHK */ > or even better MI_ARB_CHK; Who doesn't know these commands? > > > + *cs++ = 0x24 << 23 | 2; /* SRM */ > > + *cs++ = mmio_base + 0x358; /* TIMESTAMP */ > > + reloc[0].target_handle = obj[0].handle; > > + reloc[0].offset = (cs - bbe) * sizeof(*cs); > > + *cs++ = 0; > > + *cs++ = 0; > > + *cs++ = MI_BATCH_BUFFER_START | 1; > > + reloc[1].target_handle = obj[1].handle; > > + reloc[1].offset = (cs - bbe) * sizeof(*cs); > > + *cs++ = 0; > > + *cs++ = 0; > > + > > + cs = bbe + 64; > > + *cs++ = 0x24 << 23 | 2; /* SRM */ > > + *cs++ = mmio_base + 0x358; /* TIMESTAMP */ > > + reloc[2].target_handle = obj[0].handle; > > + reloc[2].offset = (cs - bbe) * sizeof(*cs); > > + *cs++ = reloc[2].delta = 4; > > + *cs++ = 0; > > + *cs++ = 0x29 << 23 | 2; /* LRM */ > > + *cs++ = mmio_base + 0x600; /* GPR0 */ > > + reloc[3].target_handle = obj[0].handle; > > + reloc[3].offset = (cs - bbe) * sizeof(*cs); > > + *cs++ = 0; > > + *cs++ = 0; > > + *cs++ = 0x24 << 23 | 2; /* SRM */ > > + *cs++ = mmio_base + 0x600; /* GPR0 */ > > + reloc[4].target_handle = obj[0].handle; > > + reloc[4].offset = (cs - bbe) * sizeof(*cs); > > + *cs++ = reloc[4].delta = 8; > > + *cs++ = 0; > > + *cs++ = 0xa << 23; > > + > > + memset(&eb, 0, sizeof(eb)); > > + eb.buffers_ptr = to_user_pointer(obj); > > + eb.buffer_count = ARRAY_SIZE(obj); > > + eb.flags = engine; > > + eb.flags |= LOCAL_I915_EXEC_NO_RELOC; > > + > > + v = 0; > > + igt_mean_init(&mean); > > + igt_until_timeout(5) { > > + eb.rsvd1 = ctx[0]; > > + eb.batch_start_offset = 0; > > + gem_execbuf(i915, &eb); > > + > > + while (results[0] == v) > > + igt_assert(gem_bo_busy(i915, obj[1].handle)); > > Is there a race there? Object will be busy short time > after the result appears. No. Object will be busy before results and until a short time after -- unless there is a gpu hang, in which case the results will never appear, but the object will no longer be busy. > > > + > > + eb.rsvd1 = ctx[1]; > > + eb.batch_start_offset = 64 * sizeof(*cs); > > + gem_execbuf(i915, &eb); > > + > > + *bbe = 0xa << 23; > > __sync_syncronize or are you relying on the corresponding > syscalls? It is not timing critical, so a lazy wcb flush is fine. -Chris _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2019-06-06 16:16 UTC | newest] Thread overview: 6+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2019-06-04 8:13 [igt-dev] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching Chris Wilson 2019-06-04 8:13 ` [igt-dev] [PATCH i-g-t 2/2] i915/gem_exec_latency: Add another variant of waiter latency Chris Wilson 2019-06-04 10:11 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] i915/gem_exec_latency: Measure the latency of context switching Patchwork 2019-06-04 16:26 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork 2019-06-05 15:04 ` [Intel-gfx] [PATCH i-g-t 1/2] " Mika Kuoppala 2019-06-06 16:16 ` [igt-dev] " Chris Wilson
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox