public inbox for igt-dev@lists.freedesktop.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching
@ 2019-06-04  8:13 Chris Wilson
  2019-06-04  8:13 ` [igt-dev] [PATCH i-g-t 2/2] i915/gem_exec_latency: Add another variant of waiter latency Chris Wilson
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Chris Wilson @ 2019-06-04  8:13 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Measure the baseline latency between contexts in order to directly
compare that with the additional cost of preemption.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_latency.c | 230 ++++++++++++++++++++++++++++++++++
 1 file changed, 230 insertions(+)

diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c
index e56d62780..e88fbbc6a 100644
--- a/tests/i915/gem_exec_latency.c
+++ b/tests/i915/gem_exec_latency.c
@@ -410,6 +410,86 @@ static void latency_from_ring(int fd,
 	}
 }
 
+static void execution_latency(int i915, unsigned int ring, const char *name)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4095),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring | LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT,
+	};
+	const unsigned int mmio_base = 0x2000;
+	const unsigned int cs_timestamp = mmio_base + 0x358;
+	volatile uint32_t *timestamp;
+	uint32_t *cs, *result;
+
+	timestamp =
+		(volatile uint32_t *)((volatile char *)igt_global_mmio + cs_timestamp);
+
+	obj.handle = gem_create(i915, 4096);
+	obj.flags = EXEC_OBJECT_PINNED;
+	result = gem_mmap__wc(i915, obj.handle, 0, 4096, PROT_WRITE);
+
+	for (int i = 0; i < 16; i++) {
+		cs = result + 16 * i;
+		*cs++ = 0x24 << 23 | 2; /* SRM */
+		*cs++ = cs_timestamp;
+		*cs++ = 4096 - 16 * 4 + i * 4;
+		*cs++ = 0;
+		*cs++ = 0xa << 23;
+	}
+
+	cs = result + 1024 - 16;
+
+	for (int length = 2; length <= 16; length <<= 1) {
+		struct igt_mean submit, batch, total;
+		int last = length - 1;
+
+		igt_mean_init(&submit);
+		igt_mean_init(&batch);
+		igt_mean_init(&total);
+
+		igt_until_timeout(2) {
+			uint32_t now, end;
+
+			cs[last] = 0;
+
+			now = *timestamp;
+			for (int i = 0; i < length; i++) {
+				execbuf.batch_start_offset = 64 * i;
+				gem_execbuf(i915, &execbuf);
+			}
+			while (!((volatile uint32_t *)cs)[last])
+				;
+			end = *timestamp;
+
+			igt_mean_add(&submit, (cs[0] - now) * rcs_clock);
+			igt_mean_add(&batch, (cs[last] - cs[0]) * rcs_clock / last);
+			igt_mean_add(&total, (end - now) * rcs_clock);
+		}
+
+		igt_info("%sx%d Submission latency: %.2f±%.2fus\n",
+			 name, length,
+			 1e-3 * igt_mean_get(&submit),
+			 1e-3 * sqrt(igt_mean_get_variance(&submit)));
+
+		igt_info("%sx%d Inter-batch latency: %.2f±%.2fus\n",
+			 name, length,
+			 1e-3 * igt_mean_get(&batch),
+			 1e-3 * sqrt(igt_mean_get_variance(&batch)));
+
+		igt_info("%sx%d End-to-end latency: %.2f±%.2fus\n",
+			 name, length,
+			 1e-3 * igt_mean_get(&total),
+			 1e-3 * sqrt(igt_mean_get_variance(&total)));
+	}
+
+	munmap(result, 4096);
+	gem_close(i915, obj.handle);
+}
+
 static void
 __submit_spin(int fd, igt_spin_t *spin, unsigned int flags)
 {
@@ -616,6 +696,142 @@ rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned in
 	munmap(results, MMAP_SZ);
 }
 
+static void context_switch(int i915,
+			   unsigned int engine, const char *name,
+			   unsigned int flags)
+{
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc[5];
+	struct drm_i915_gem_execbuffer2 eb;
+	uint32_t *cs, *bbe, *results, v;
+	unsigned int mmio_base;
+	struct igt_mean mean;
+	uint32_t ctx[2];
+
+	/* XXX i915_query()! */
+	switch (engine) {
+	case I915_EXEC_DEFAULT:
+	case I915_EXEC_RENDER:
+		mmio_base = 0x2000;
+		break;
+#if 0
+	case I915_EXEC_BSD:
+		mmio_base = 0x12000;
+		break;
+#endif
+	case I915_EXEC_BLT:
+		mmio_base = 0x22000;
+		break;
+
+	case I915_EXEC_VEBOX:
+		if (intel_gen(intel_get_drm_devid(i915)) >= 11)
+			mmio_base = 0x1d8000;
+		else
+			mmio_base = 0x1a000;
+		break;
+
+	default:
+		igt_skip("mmio base not known\n");
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+		ctx[i] = gem_context_create(i915);
+
+	if (flags & PREEMPT) {
+		gem_context_set_priority(i915, ctx[0], -1023);
+		gem_context_set_priority(i915, ctx[1], +1023);
+	}
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(i915, 4096);
+	gem_set_caching(i915, obj[0].handle, 1);
+	results = gem_mmap__cpu(i915, obj[0].handle, 0, 4096, PROT_READ);
+	gem_set_domain(i915, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
+
+	obj[1].handle = gem_create(i915, 4096);
+	memset(reloc,0, sizeof(reloc));
+	obj[1].relocation_count = ARRAY_SIZE(reloc);
+	obj[1].relocs_ptr = to_user_pointer(reloc);
+	bbe = gem_mmap__wc(i915, obj[1].handle, 0, 4096, PROT_WRITE);
+	gem_set_domain(i915, obj[1].handle,
+		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+
+	cs = bbe;
+	*cs++ = 0x5 << 23;
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = mmio_base + 0x358; /* TIMESTAMP */
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = MI_BATCH_BUFFER_START | 1;
+	reloc[1].target_handle = obj[1].handle;
+	reloc[1].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = 0;
+	*cs++ = 0;
+
+	cs = bbe + 64;
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = mmio_base + 0x358; /* TIMESTAMP */
+	reloc[2].target_handle = obj[0].handle;
+	reloc[2].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = reloc[2].delta = 4;
+	*cs++ = 0;
+	*cs++ = 0x29 << 23 | 2; /* LRM */
+	*cs++ = mmio_base + 0x600; /* GPR0 */
+	reloc[3].target_handle = obj[0].handle;
+	reloc[3].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = mmio_base + 0x600; /* GPR0 */
+	reloc[4].target_handle = obj[0].handle;
+	reloc[4].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = reloc[4].delta = 8;
+	*cs++ = 0;
+	*cs++ = 0xa << 23;
+
+	memset(&eb, 0, sizeof(eb));
+	eb.buffers_ptr = to_user_pointer(obj);
+	eb.buffer_count = ARRAY_SIZE(obj);
+	eb.flags = engine;
+	eb.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+	v = 0;
+	igt_mean_init(&mean);
+	igt_until_timeout(5) {
+		eb.rsvd1 = ctx[0];
+		eb.batch_start_offset = 0;
+		gem_execbuf(i915, &eb);
+
+		while (results[0] == v)
+			igt_assert(gem_bo_busy(i915, obj[1].handle));
+
+		eb.rsvd1 = ctx[1];
+		eb.batch_start_offset = 64 * sizeof(*cs);
+		gem_execbuf(i915, &eb);
+
+		*bbe = 0xa << 23;
+		gem_sync(i915, obj[1].handle);
+		*bbe = 0x5 << 23;
+
+		v = results[0];
+		igt_mean_add(&mean, (results[1] - results[2]) * rcs_clock);
+	}
+	igt_info("%s context switch latency%s: %.2f±%.2fus\n",
+		 name, flags & PREEMPT ? " (preempt)" : "",
+		 1e-3 * igt_mean_get(&mean),
+		 1e-3 * sqrt(igt_mean_get_variance(&mean)));
+	munmap(results, 4096);
+	munmap(bbe, 4096);
+
+	for (int i = 0; i < ARRAY_SIZE(obj); i++)
+		gem_close(i915, obj[i].handle);
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+		gem_context_destroy(i915, ctx[i]);
+}
+
 static double clockrate(int i915, int reg)
 {
 	volatile uint32_t *mmio;
@@ -722,6 +938,11 @@ igt_main
 							      e->name,
 							      0);
 
+				igt_subtest_f("%s-execution-latency", e->name)
+					execution_latency(device,
+							  e->exec_id | e->flags,
+							  e->name);
+
 				igt_subtest_f("%s-live-dispatch-queued", e->name)
 					latency_on_ring(device,
 							e->exec_id | e->flags,
@@ -741,12 +962,21 @@ igt_main
 							  e->exec_id | e->flags,
 							  e->name, CORK);
 
+				igt_subtest_f("%s-cs", e->name)
+					context_switch(device,
+						       e->exec_id | e->flags,
+						       e->name, 0);
 				igt_subtest_group {
 					igt_fixture {
 						gem_require_contexts(device);
 						igt_require(gem_scheduler_has_preemption(device));
 					}
 
+					igt_subtest_f("%s-cs-preempt", e->name)
+						context_switch(device,
+								e->exec_id | e->flags,
+								e->name, PREEMPT);
+
 					igt_subtest_f("%s-preemption", e->name)
 						latency_from_ring(device,
 								  e->exec_id | e->flags,
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [igt-dev] [PATCH i-g-t 2/2] i915/gem_exec_latency: Add another variant of waiter latency
  2019-06-04  8:13 [igt-dev] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching Chris Wilson
@ 2019-06-04  8:13 ` Chris Wilson
  2019-06-04 10:11 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] i915/gem_exec_latency: Measure the latency of context switching Patchwork
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Chris Wilson @ 2019-06-04  8:13 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_latency.c | 81 +++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c
index e88fbbc6a..fd4ceb4d6 100644
--- a/tests/i915/gem_exec_latency.c
+++ b/tests/i915/gem_exec_latency.c
@@ -490,6 +490,83 @@ static void execution_latency(int i915, unsigned int ring, const char *name)
 	gem_close(i915, obj.handle);
 }
 
+static void wakeup_latency(int i915, unsigned int ring, const char *name)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4095),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring | LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT,
+	};
+	const unsigned int mmio_base = 0x2000;
+	const unsigned int cs_timestamp = mmio_base + 0x358;
+	volatile uint32_t *timestamp;
+	struct igt_mean wakeup;
+	uint32_t *cs, *result;
+
+	timestamp =
+		(volatile uint32_t *)((volatile char *)igt_global_mmio + cs_timestamp);
+
+	obj.handle = gem_create(i915, 4096);
+	obj.flags = EXEC_OBJECT_PINNED;
+	result = gem_mmap__wc(i915, obj.handle, 0, 4096, PROT_WRITE);
+
+	cs = result;
+
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = cs_timestamp;
+	*cs++ = 4096 - 16 * 4;
+	*cs++ = 0;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = cs_timestamp;
+	*cs++ = 4096 - 16 * 4 + 4;
+	*cs++ = 0;
+	*cs++ = 0xa << 23;
+
+	cs = result + 1024 - 16;
+
+	{
+		struct sched_param p = { .sched_priority = 99 };
+		sched_setscheduler(0, SCHED_FIFO | SCHED_RESET_ON_FORK, &p);
+	}
+
+	igt_mean_init(&wakeup);
+	igt_until_timeout(2) {
+		uint32_t end;
+
+		igt_fork(child, 1) {
+			result[4] = MI_BATCH_BUFFER_START | 1;
+			cs[0] = 0;
+
+			gem_execbuf(i915, &execbuf);
+
+			while (!cs[0])
+				;
+			result[4] = 0;
+			__sync_synchronize();
+		}
+		gem_sync(i915, obj.handle);
+		end = *timestamp;
+
+		igt_mean_add(&wakeup, (end - cs[1]) * rcs_clock);
+		igt_waitchildren();
+	}
+	igt_info("%s Wakeup latency: %.2f±%.2fms [%.2f, %.2f]\n", name,
+		 1e-6 * igt_mean_get(&wakeup),
+		 1e-6 * sqrt(igt_mean_get_variance(&wakeup)),
+		 1e-6 * wakeup.min, 1e-6 * wakeup.max);
+
+	munmap(result, 4096);
+	gem_close(i915, obj.handle);
+}
+
 static void
 __submit_spin(int fd, igt_spin_t *spin, unsigned int flags)
 {
@@ -942,6 +1019,10 @@ igt_main
 					execution_latency(device,
 							  e->exec_id | e->flags,
 							  e->name);
+				igt_subtest_f("%s-wakeup-latency", e->name)
+					wakeup_latency(device,
+							e->exec_id | e->flags,
+							e->name);
 
 				igt_subtest_f("%s-live-dispatch-queued", e->name)
 					latency_on_ring(device,
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] i915/gem_exec_latency: Measure the latency of context switching
  2019-06-04  8:13 [igt-dev] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching Chris Wilson
  2019-06-04  8:13 ` [igt-dev] [PATCH i-g-t 2/2] i915/gem_exec_latency: Add another variant of waiter latency Chris Wilson
@ 2019-06-04 10:11 ` Patchwork
  2019-06-04 16:26 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
  2019-06-05 15:04 ` [Intel-gfx] [PATCH i-g-t 1/2] " Mika Kuoppala
  3 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2019-06-04 10:11 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,1/2] i915/gem_exec_latency: Measure the latency of context switching
URL   : https://patchwork.freedesktop.org/series/61566/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6183 -> IGTPW_3098
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/61566/revisions/1/mbox/

Known issues
------------

  Here are the changes found in IGTPW_3098 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@kms_frontbuffer_tracking@basic:
    - fi-icl-u3:          [PASS][1] -> [FAIL][2] ([fdo#103167])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/fi-icl-u3/igt@kms_frontbuffer_tracking@basic.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/fi-icl-u3/igt@kms_frontbuffer_tracking@basic.html

  
#### Possible fixes ####

  * igt@gem_ctx_create@basic:
    - fi-icl-u3:          [DMESG-WARN][3] ([fdo#107724]) -> [PASS][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/fi-icl-u3/igt@gem_ctx_create@basic.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/fi-icl-u3/igt@gem_ctx_create@basic.html

  * igt@i915_selftest@live_contexts:
    - fi-bdw-gvtdvm:      [DMESG-FAIL][5] ([fdo#110235]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html

  * igt@vgem_basic@dmabuf-export:
    - fi-icl-dsi:         [INCOMPLETE][7] ([fdo#107713]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/fi-icl-dsi/igt@vgem_basic@dmabuf-export.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/fi-icl-dsi/igt@vgem_basic@dmabuf-export.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#110235]: https://bugs.freedesktop.org/show_bug.cgi?id=110235
  [fdo#110829]: https://bugs.freedesktop.org/show_bug.cgi?id=110829


Participating hosts (53 -> 46)
------------------------------

  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-kbl-7560u fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * IGT: IGT_5033 -> IGTPW_3098

  CI_DRM_6183: aff09dc14e1d9f03f9e6c8c157d4abccf4ca2b14 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_3098: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/
  IGT_5033: 8eddc43c75c2ae4ec6e3bee70e4f815e08a928c8 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools



== Testlist changes ==

+igt@gem_exec_latency@blt-cs
+igt@gem_exec_latency@blt-cs-preempt
+igt@gem_exec_latency@blt-execution-latency
+igt@gem_exec_latency@blt-wakeup-latency
+igt@gem_exec_latency@bsd1-cs
+igt@gem_exec_latency@bsd1-cs-preempt
+igt@gem_exec_latency@bsd1-execution-latency
+igt@gem_exec_latency@bsd1-wakeup-latency
+igt@gem_exec_latency@bsd2-cs
+igt@gem_exec_latency@bsd2-cs-preempt
+igt@gem_exec_latency@bsd2-execution-latency
+igt@gem_exec_latency@bsd2-wakeup-latency
+igt@gem_exec_latency@bsd-cs
+igt@gem_exec_latency@bsd-cs-preempt
+igt@gem_exec_latency@bsd-execution-latency
+igt@gem_exec_latency@bsd-wakeup-latency
+igt@gem_exec_latency@render-cs
+igt@gem_exec_latency@render-cs-preempt
+igt@gem_exec_latency@render-execution-latency
+igt@gem_exec_latency@render-wakeup-latency
+igt@gem_exec_latency@vebox-cs
+igt@gem_exec_latency@vebox-cs-preempt
+igt@gem_exec_latency@vebox-execution-latency
+igt@gem_exec_latency@vebox-wakeup-latency

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/2] i915/gem_exec_latency: Measure the latency of context switching
  2019-06-04  8:13 [igt-dev] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching Chris Wilson
  2019-06-04  8:13 ` [igt-dev] [PATCH i-g-t 2/2] i915/gem_exec_latency: Add another variant of waiter latency Chris Wilson
  2019-06-04 10:11 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] i915/gem_exec_latency: Measure the latency of context switching Patchwork
@ 2019-06-04 16:26 ` Patchwork
  2019-06-05 15:04 ` [Intel-gfx] [PATCH i-g-t 1/2] " Mika Kuoppala
  3 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2019-06-04 16:26 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,1/2] i915/gem_exec_latency: Measure the latency of context switching
URL   : https://patchwork.freedesktop.org/series/61566/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6183_full -> IGTPW_3098_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/61566/revisions/1/mbox/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_3098_full:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * {igt@gem_ctx_shared@exec-single-timeline-default}:
    - shard-iclb:         NOTRUN -> [SKIP][1]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-iclb4/igt@gem_ctx_shared@exec-single-timeline-default.html

  
Known issues
------------

  Here are the changes found in IGTPW_3098_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_tiled_swapping@non-threaded:
    - shard-apl:          [PASS][2] -> [FAIL][3] ([fdo#108686])
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-apl5/igt@gem_tiled_swapping@non-threaded.html
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-apl2/igt@gem_tiled_swapping@non-threaded.html

  * igt@kms_cursor_crc@pipe-b-cursor-128x128-onscreen:
    - shard-kbl:          [PASS][4] -> [FAIL][5] ([fdo#103232])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-kbl2/igt@kms_cursor_crc@pipe-b-cursor-128x128-onscreen.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-kbl6/igt@kms_cursor_crc@pipe-b-cursor-128x128-onscreen.html
    - shard-apl:          [PASS][6] -> [FAIL][7] ([fdo#103232])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-apl2/igt@kms_cursor_crc@pipe-b-cursor-128x128-onscreen.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-apl6/igt@kms_cursor_crc@pipe-b-cursor-128x128-onscreen.html

  * igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic:
    - shard-hsw:          [PASS][8] -> [FAIL][9] ([fdo#105767])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-hsw7/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-hsw6/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic.html

  * igt@kms_flip@2x-flip-vs-suspend:
    - shard-hsw:          [PASS][10] -> [SKIP][11] ([fdo#109271]) +4 similar issues
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-hsw6/igt@kms_flip@2x-flip-vs-suspend.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-hsw1/igt@kms_flip@2x-flip-vs-suspend.html

  * igt@kms_flip@dpms-vs-vblank-race:
    - shard-glk:          [PASS][12] -> [FAIL][13] ([fdo#103060])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-glk7/igt@kms_flip@dpms-vs-vblank-race.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-glk1/igt@kms_flip@dpms-vs-vblank-race.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-pwrite:
    - shard-iclb:         [PASS][14] -> [FAIL][15] ([fdo#103167]) +5 similar issues
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-iclb1/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-pwrite.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-iclb2/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-pwrite.html

  * igt@kms_psr@psr2_cursor_mmap_cpu:
    - shard-iclb:         [PASS][16] -> [SKIP][17] ([fdo#109441]) +1 similar issue
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-iclb2/igt@kms_psr@psr2_cursor_mmap_cpu.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-iclb4/igt@kms_psr@psr2_cursor_mmap_cpu.html

  * igt@kms_setmode@basic:
    - shard-apl:          [PASS][18] -> [FAIL][19] ([fdo#99912])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-apl7/igt@kms_setmode@basic.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-apl8/igt@kms_setmode@basic.html

  * igt@kms_vblank@pipe-c-ts-continuation-suspend:
    - shard-apl:          [PASS][20] -> [DMESG-WARN][21] ([fdo#108566]) +5 similar issues
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-apl3/igt@kms_vblank@pipe-c-ts-continuation-suspend.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-apl4/igt@kms_vblank@pipe-c-ts-continuation-suspend.html

  
#### Possible fixes ####

  * igt@gem_tiled_swapping@non-threaded:
    - shard-hsw:          [FAIL][22] ([fdo#108686]) -> [PASS][23]
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-hsw8/igt@gem_tiled_swapping@non-threaded.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-hsw8/igt@gem_tiled_swapping@non-threaded.html
    - shard-kbl:          [DMESG-WARN][24] ([fdo#108686]) -> [PASS][25]
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-kbl6/igt@gem_tiled_swapping@non-threaded.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-kbl2/igt@gem_tiled_swapping@non-threaded.html

  * igt@kms_atomic_transition@plane-toggle-modeset-transition:
    - shard-iclb:         [INCOMPLETE][26] ([fdo#107713]) -> [PASS][27]
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-iclb7/igt@kms_atomic_transition@plane-toggle-modeset-transition.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-iclb7/igt@kms_atomic_transition@plane-toggle-modeset-transition.html

  * igt@kms_flip@2x-plain-flip:
    - shard-hsw:          [SKIP][28] ([fdo#109271]) -> [PASS][29] +18 similar issues
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-hsw1/igt@kms_flip@2x-plain-flip.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-hsw2/igt@kms_flip@2x-plain-flip.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-pri-indfb-multidraw:
    - shard-iclb:         [FAIL][30] ([fdo#103167]) -> [PASS][31] +1 similar issue
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-iclb2/igt@kms_frontbuffer_tracking@fbcpsr-1p-pri-indfb-multidraw.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-iclb5/igt@kms_frontbuffer_tracking@fbcpsr-1p-pri-indfb-multidraw.html

  * igt@kms_psr@psr2_primary_mmap_gtt:
    - shard-iclb:         [SKIP][32] ([fdo#109441]) -> [PASS][33] +2 similar issues
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-iclb6/igt@kms_psr@psr2_primary_mmap_gtt.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-iclb2/igt@kms_psr@psr2_primary_mmap_gtt.html

  * igt@kms_sysfs_edid_timing:
    - shard-hsw:          [FAIL][34] ([fdo#100047]) -> [PASS][35]
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-hsw1/igt@kms_sysfs_edid_timing.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-hsw2/igt@kms_sysfs_edid_timing.html

  * igt@perf_pmu@rc6:
    - shard-kbl:          [SKIP][36] ([fdo#109271]) -> [PASS][37]
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-kbl2/igt@perf_pmu@rc6.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-kbl1/igt@perf_pmu@rc6.html

  * igt@perf_pmu@rc6-runtime-pm-long:
    - shard-kbl:          [FAIL][38] ([fdo#105010]) -> [PASS][39]
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-kbl6/igt@perf_pmu@rc6-runtime-pm-long.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-kbl1/igt@perf_pmu@rc6-runtime-pm-long.html

  
#### Warnings ####

  * igt@gem_mmap_gtt@forked-big-copy-odd:
    - shard-iclb:         [INCOMPLETE][40] ([fdo#107713] / [fdo#109100]) -> [TIMEOUT][41] ([fdo#109673])
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6183/shard-iclb4/igt@gem_mmap_gtt@forked-big-copy-odd.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/shard-iclb8/igt@gem_mmap_gtt@forked-big-copy-odd.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#100047]: https://bugs.freedesktop.org/show_bug.cgi?id=100047
  [fdo#103060]: https://bugs.freedesktop.org/show_bug.cgi?id=103060
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#105010]: https://bugs.freedesktop.org/show_bug.cgi?id=105010
  [fdo#105767]: https://bugs.freedesktop.org/show_bug.cgi?id=105767
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#108686]: https://bugs.freedesktop.org/show_bug.cgi?id=108686
  [fdo#109100]: https://bugs.freedesktop.org/show_bug.cgi?id=109100
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109673]: https://bugs.freedesktop.org/show_bug.cgi?id=109673
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912


Participating hosts (10 -> 6)
------------------------------

  Missing    (4): pig-skl-6260u shard-skl pig-hsw-4770r pig-glk-j5005 


Build changes
-------------

  * IGT: IGT_5033 -> IGTPW_3098
  * Piglit: piglit_4509 -> None

  CI_DRM_6183: aff09dc14e1d9f03f9e6c8c157d4abccf4ca2b14 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_3098: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/
  IGT_5033: 8eddc43c75c2ae4ec6e3bee70e4f815e08a928c8 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3098/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Intel-gfx] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching
  2019-06-04  8:13 [igt-dev] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching Chris Wilson
                   ` (2 preceding siblings ...)
  2019-06-04 16:26 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
@ 2019-06-05 15:04 ` Mika Kuoppala
  2019-06-06 16:16   ` [igt-dev] " Chris Wilson
  3 siblings, 1 reply; 6+ messages in thread
From: Mika Kuoppala @ 2019-06-05 15:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Measure the baseline latency between contexts in order to directly
> compare that with the additional cost of preemption.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  tests/i915/gem_exec_latency.c | 230 ++++++++++++++++++++++++++++++++++
>  1 file changed, 230 insertions(+)
>
> diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c
> index e56d62780..e88fbbc6a 100644
> --- a/tests/i915/gem_exec_latency.c
> +++ b/tests/i915/gem_exec_latency.c
> @@ -410,6 +410,86 @@ static void latency_from_ring(int fd,
>  	}
>  }
>  
> +static void execution_latency(int i915, unsigned int ring, const char *name)
> +{
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4095),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = ring | LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT,
> +	};
> +	const unsigned int mmio_base = 0x2000;
> +	const unsigned int cs_timestamp = mmio_base + 0x358;
> +	volatile uint32_t *timestamp;
> +	uint32_t *cs, *result;
> +
> +	timestamp =
> +		(volatile uint32_t *)((volatile char *)igt_global_mmio + cs_timestamp);
> +
> +	obj.handle = gem_create(i915, 4096);
> +	obj.flags = EXEC_OBJECT_PINNED;
> +	result = gem_mmap__wc(i915, obj.handle, 0, 4096, PROT_WRITE);
> +
> +	for (int i = 0; i < 16; i++) {
> +		cs = result + 16 * i;
> +		*cs++ = 0x24 << 23 | 2; /* SRM */
> +		*cs++ = cs_timestamp;
> +		*cs++ = 4096 - 16 * 4 + i * 4;
> +		*cs++ = 0;
> +		*cs++ = 0xa << 23;

Why not MI_BATCH_BUFFER_END? To emphasize that we have
multiple batches inside a bo?

> +	}
> +
> +	cs = result + 1024 - 16;
> +
> +	for (int length = 2; length <= 16; length <<= 1) {
> +		struct igt_mean submit, batch, total;
> +		int last = length - 1;
> +
> +		igt_mean_init(&submit);
> +		igt_mean_init(&batch);
> +		igt_mean_init(&total);
> +
> +		igt_until_timeout(2) {
> +			uint32_t now, end;
> +
> +			cs[last] = 0;
> +
> +			now = *timestamp;
> +			for (int i = 0; i < length; i++) {
> +				execbuf.batch_start_offset = 64 * i;
> +				gem_execbuf(i915, &execbuf);
> +			}
> +			while (!((volatile uint32_t *)cs)[last])
> +				;
> +			end = *timestamp;
> +
> +			igt_mean_add(&submit, (cs[0] - now) * rcs_clock);
> +			igt_mean_add(&batch, (cs[last] - cs[0]) * rcs_clock / last);

Just curious of what do you use of inter batch latency?

Oh and do we need to to take the rcs_clock resolution into account
on result calculation. Prolly not as it seems to be ticking fast enough
for 0.1us accuracy.

> +			igt_mean_add(&total, (end - now) * rcs_clock);
> +		}
> +
> +		igt_info("%sx%d Submission latency: %.2f±%.2fus\n",
> +			 name, length,
> +			 1e-3 * igt_mean_get(&submit),
> +			 1e-3 * sqrt(igt_mean_get_variance(&submit)));
> +
> +		igt_info("%sx%d Inter-batch latency: %.2f±%.2fus\n",
> +			 name, length,
> +			 1e-3 * igt_mean_get(&batch),
> +			 1e-3 * sqrt(igt_mean_get_variance(&batch)));
> +
> +		igt_info("%sx%d End-to-end latency: %.2f±%.2fus\n",
> +			 name, length,
> +			 1e-3 * igt_mean_get(&total),
> +			 1e-3 * sqrt(igt_mean_get_variance(&total)));
> +	}
> +
> +	munmap(result, 4096);
> +	gem_close(i915, obj.handle);
> +}
> +
>  static void
>  __submit_spin(int fd, igt_spin_t *spin, unsigned int flags)
>  {
> @@ -616,6 +696,142 @@ rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned in
>  	munmap(results, MMAP_SZ);
>  }
>  
> +static void context_switch(int i915,
> +			   unsigned int engine, const char *name,
> +			   unsigned int flags)
> +{
> +	struct drm_i915_gem_exec_object2 obj[2];
> +	struct drm_i915_gem_relocation_entry reloc[5];
> +	struct drm_i915_gem_execbuffer2 eb;
> +	uint32_t *cs, *bbe, *results, v;
> +	unsigned int mmio_base;
> +	struct igt_mean mean;
> +	uint32_t ctx[2];
> +
> +	/* XXX i915_query()! */
> +	switch (engine) {
> +	case I915_EXEC_DEFAULT:
> +	case I915_EXEC_RENDER:
> +		mmio_base = 0x2000;
> +		break;
> +#if 0
> +	case I915_EXEC_BSD:
> +		mmio_base = 0x12000;
> +		break;
> +#endif

It seems to be there according to bspec. Timestamps don't
work with it?

> +	case I915_EXEC_BLT:
> +		mmio_base = 0x22000;
> +		break;
> +
> +	case I915_EXEC_VEBOX:
> +		if (intel_gen(intel_get_drm_devid(i915)) >= 11)
> +			mmio_base = 0x1d8000;
> +		else
> +			mmio_base = 0x1a000;
> +		break;
> +
> +	default:
> +		igt_skip("mmio base not known\n");
> +	}
> +
> +	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
> +		ctx[i] = gem_context_create(i915);
> +
> +	if (flags & PREEMPT) {
> +		gem_context_set_priority(i915, ctx[0], -1023);
> +		gem_context_set_priority(i915, ctx[1], +1023);
> +	}
> +
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = gem_create(i915, 4096);
> +	gem_set_caching(i915, obj[0].handle, 1);
> +	results = gem_mmap__cpu(i915, obj[0].handle, 0, 4096, PROT_READ);
> +	gem_set_domain(i915, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
> +
> +	obj[1].handle = gem_create(i915, 4096);
> +	memset(reloc,0, sizeof(reloc));
> +	obj[1].relocation_count = ARRAY_SIZE(reloc);
> +	obj[1].relocs_ptr = to_user_pointer(reloc);
> +	bbe = gem_mmap__wc(i915, obj[1].handle, 0, 4096, PROT_WRITE);
> +	gem_set_domain(i915, obj[1].handle,
> +		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
> +
> +	cs = bbe;
> +	*cs++ = 0x5 << 23;

/* ARB_CHK */
or even better MI_ARB_CHK;

> +	*cs++ = 0x24 << 23 | 2; /* SRM */
> +	*cs++ = mmio_base + 0x358; /* TIMESTAMP */
> +	reloc[0].target_handle = obj[0].handle;
> +	reloc[0].offset = (cs - bbe) * sizeof(*cs);
> +	*cs++ = 0;
> +	*cs++ = 0;
> +	*cs++ = MI_BATCH_BUFFER_START | 1;
> +	reloc[1].target_handle = obj[1].handle;
> +	reloc[1].offset = (cs - bbe) * sizeof(*cs);
> +	*cs++ = 0;
> +	*cs++ = 0;
> +
> +	cs = bbe + 64;
> +	*cs++ = 0x24 << 23 | 2; /* SRM */
> +	*cs++ = mmio_base + 0x358; /* TIMESTAMP */
> +	reloc[2].target_handle = obj[0].handle;
> +	reloc[2].offset = (cs - bbe) * sizeof(*cs);
> +	*cs++ = reloc[2].delta = 4;
> +	*cs++ = 0;
> +	*cs++ = 0x29 << 23 | 2; /* LRM */
> +	*cs++ = mmio_base + 0x600; /* GPR0 */
> +	reloc[3].target_handle = obj[0].handle;
> +	reloc[3].offset = (cs - bbe) * sizeof(*cs);
> +	*cs++ = 0;
> +	*cs++ = 0;
> +	*cs++ = 0x24 << 23 | 2; /* SRM */
> +	*cs++ = mmio_base + 0x600; /* GPR0 */
> +	reloc[4].target_handle = obj[0].handle;
> +	reloc[4].offset = (cs - bbe) * sizeof(*cs);
> +	*cs++ = reloc[4].delta = 8;
> +	*cs++ = 0;
> +	*cs++ = 0xa << 23;
> +
> +	memset(&eb, 0, sizeof(eb));
> +	eb.buffers_ptr = to_user_pointer(obj);
> +	eb.buffer_count = ARRAY_SIZE(obj);
> +	eb.flags = engine;
> +	eb.flags |= LOCAL_I915_EXEC_NO_RELOC;
> +
> +	v = 0;
> +	igt_mean_init(&mean);
> +	igt_until_timeout(5) {
> +		eb.rsvd1 = ctx[0];
> +		eb.batch_start_offset = 0;
> +		gem_execbuf(i915, &eb);
> +
> +		while (results[0] == v)
> +			igt_assert(gem_bo_busy(i915, obj[1].handle));

Is there a race there? Object will be busy short time
after the result appears.

> +
> +		eb.rsvd1 = ctx[1];
> +		eb.batch_start_offset = 64 * sizeof(*cs);
> +		gem_execbuf(i915, &eb);
> +
> +		*bbe = 0xa << 23;

__sync_syncronize or are you relying on the corresponding
syscalls?

-Mika

> +		gem_sync(i915, obj[1].handle);
> +		*bbe = 0x5 << 23;
> +
> +		v = results[0];
> +		igt_mean_add(&mean, (results[1] - results[2]) * rcs_clock);
> +	}
> +	igt_info("%s context switch latency%s: %.2f±%.2fus\n",
> +		 name, flags & PREEMPT ? " (preempt)" : "",
> +		 1e-3 * igt_mean_get(&mean),
> +		 1e-3 * sqrt(igt_mean_get_variance(&mean)));
> +	munmap(results, 4096);
> +	munmap(bbe, 4096);
> +
> +	for (int i = 0; i < ARRAY_SIZE(obj); i++)
> +		gem_close(i915, obj[i].handle);
> +
> +	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
> +		gem_context_destroy(i915, ctx[i]);
> +}
> +
>  static double clockrate(int i915, int reg)
>  {
>  	volatile uint32_t *mmio;
> @@ -722,6 +938,11 @@ igt_main
>  							      e->name,
>  							      0);
>  
> +				igt_subtest_f("%s-execution-latency", e->name)
> +					execution_latency(device,
> +							  e->exec_id | e->flags,
> +							  e->name);
> +
>  				igt_subtest_f("%s-live-dispatch-queued", e->name)
>  					latency_on_ring(device,
>  							e->exec_id | e->flags,
> @@ -741,12 +962,21 @@ igt_main
>  							  e->exec_id | e->flags,
>  							  e->name, CORK);
>  
> +				igt_subtest_f("%s-cs", e->name)
> +					context_switch(device,
> +						       e->exec_id | e->flags,
> +						       e->name, 0);
>  				igt_subtest_group {
>  					igt_fixture {
>  						gem_require_contexts(device);
>  						igt_require(gem_scheduler_has_preemption(device));
>  					}
>  
> +					igt_subtest_f("%s-cs-preempt", e->name)
> +						context_switch(device,
> +								e->exec_id | e->flags,
> +								e->name, PREEMPT);
> +
>  					igt_subtest_f("%s-preemption", e->name)
>  						latency_from_ring(device,
>  								  e->exec_id | e->flags,
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching
  2019-06-05 15:04 ` [Intel-gfx] [PATCH i-g-t 1/2] " Mika Kuoppala
@ 2019-06-06 16:16   ` Chris Wilson
  0 siblings, 0 replies; 6+ messages in thread
From: Chris Wilson @ 2019-06-06 16:16 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx; +Cc: igt-dev

Quoting Mika Kuoppala (2019-06-05 16:04:09)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Measure the baseline latency between contexts in order to directly
> > compare that with the additional cost of preemption.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  tests/i915/gem_exec_latency.c | 230 ++++++++++++++++++++++++++++++++++
> >  1 file changed, 230 insertions(+)
> >
> > diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c
> > index e56d62780..e88fbbc6a 100644
> > --- a/tests/i915/gem_exec_latency.c
> > +++ b/tests/i915/gem_exec_latency.c
> > @@ -410,6 +410,86 @@ static void latency_from_ring(int fd,
> >       }
> >  }
> >  
> > +static void execution_latency(int i915, unsigned int ring, const char *name)
> > +{
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4095),
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = ring | LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT,
> > +     };
> > +     const unsigned int mmio_base = 0x2000;
> > +     const unsigned int cs_timestamp = mmio_base + 0x358;
> > +     volatile uint32_t *timestamp;
> > +     uint32_t *cs, *result;
> > +
> > +     timestamp =
> > +             (volatile uint32_t *)((volatile char *)igt_global_mmio + cs_timestamp);
> > +
> > +     obj.handle = gem_create(i915, 4096);
> > +     obj.flags = EXEC_OBJECT_PINNED;
> > +     result = gem_mmap__wc(i915, obj.handle, 0, 4096, PROT_WRITE);
> > +
> > +     for (int i = 0; i < 16; i++) {
> > +             cs = result + 16 * i;
> > +             *cs++ = 0x24 << 23 | 2; /* SRM */
> > +             *cs++ = cs_timestamp;
> > +             *cs++ = 4096 - 16 * 4 + i * 4;
> > +             *cs++ = 0;
> > +             *cs++ = 0xa << 23;
> 
> Why not MI_BATCH_BUFFER_END? To emphasize that we have
> multiple batches inside a bo?

I wrote MI_BATCH_BUFFER_END... Oh, I see, no, I was just using the
values for no other reason than habit.

> > +     }
> > +
> > +     cs = result + 1024 - 16;
> > +
> > +     for (int length = 2; length <= 16; length <<= 1) {
> > +             struct igt_mean submit, batch, total;
> > +             int last = length - 1;
> > +
> > +             igt_mean_init(&submit);
> > +             igt_mean_init(&batch);
> > +             igt_mean_init(&total);
> > +
> > +             igt_until_timeout(2) {
> > +                     uint32_t now, end;
> > +
> > +                     cs[last] = 0;
> > +
> > +                     now = *timestamp;
> > +                     for (int i = 0; i < length; i++) {
> > +                             execbuf.batch_start_offset = 64 * i;
> > +                             gem_execbuf(i915, &execbuf);
> > +                     }
> > +                     while (!((volatile uint32_t *)cs)[last])
> > +                             ;
> > +                     end = *timestamp;
> > +
> > +                     igt_mean_add(&submit, (cs[0] - now) * rcs_clock);
> > +                     igt_mean_add(&batch, (cs[last] - cs[0]) * rcs_clock / last);
> 
> Just curious of what do you use of inter batch latency?

EPARSE.

> Oh and do we need to to take the rcs_clock resolution into account
> on result calculation. Prolly not as it seems to be ticking fast enough
> for 0.1us accuracy.

It just provides the lower granularity to results. It doesn't alter the
calculations or presentation, just that quantum beneath which we cannot
measure.

Considering that the CS cannot go faster than its clock, it doesn't
affect anything.

> > +                     igt_mean_add(&total, (end - now) * rcs_clock);
> > +             }
> > +
> > +             igt_info("%sx%d Submission latency: %.2f±%.2fus\n",
> > +                      name, length,
> > +                      1e-3 * igt_mean_get(&submit),
> > +                      1e-3 * sqrt(igt_mean_get_variance(&submit)));
> > +
> > +             igt_info("%sx%d Inter-batch latency: %.2f±%.2fus\n",
> > +                      name, length,
> > +                      1e-3 * igt_mean_get(&batch),
> > +                      1e-3 * sqrt(igt_mean_get_variance(&batch)));
> > +
> > +             igt_info("%sx%d End-to-end latency: %.2f±%.2fus\n",
> > +                      name, length,
> > +                      1e-3 * igt_mean_get(&total),
> > +                      1e-3 * sqrt(igt_mean_get_variance(&total)));
> > +     }
> > +
> > +     munmap(result, 4096);
> > +     gem_close(i915, obj.handle);
> > +}
> > +
> >  static void
> >  __submit_spin(int fd, igt_spin_t *spin, unsigned int flags)
> >  {
> > @@ -616,6 +696,142 @@ rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned in
> >       munmap(results, MMAP_SZ);
> >  }
> >  
> > +static void context_switch(int i915,
> > +                        unsigned int engine, const char *name,
> > +                        unsigned int flags)
> > +{
> > +     struct drm_i915_gem_exec_object2 obj[2];
> > +     struct drm_i915_gem_relocation_entry reloc[5];
> > +     struct drm_i915_gem_execbuffer2 eb;
> > +     uint32_t *cs, *bbe, *results, v;
> > +     unsigned int mmio_base;
> > +     struct igt_mean mean;
> > +     uint32_t ctx[2];
> > +
> > +     /* XXX i915_query()! */
> > +     switch (engine) {
> > +     case I915_EXEC_DEFAULT:
> > +     case I915_EXEC_RENDER:
> > +             mmio_base = 0x2000;
> > +             break;
> > +#if 0
> > +     case I915_EXEC_BSD:
> > +             mmio_base = 0x12000;
> > +             break;
> > +#endif
> 
> It seems to be there according to bspec. Timestamps don't
> work with it?

Engine assignment is random. Old uapi is a mess.

> 
> > +     case I915_EXEC_BLT:
> > +             mmio_base = 0x22000;
> > +             break;
> > +
> > +     case I915_EXEC_VEBOX:
> > +             if (intel_gen(intel_get_drm_devid(i915)) >= 11)
> > +                     mmio_base = 0x1d8000;
> > +             else
> > +                     mmio_base = 0x1a000;
> > +             break;
> > +
> > +     default:
> > +             igt_skip("mmio base not known\n");
> > +     }
> > +
> > +     for (int i = 0; i < ARRAY_SIZE(ctx); i++)
> > +             ctx[i] = gem_context_create(i915);
> > +
> > +     if (flags & PREEMPT) {
> > +             gem_context_set_priority(i915, ctx[0], -1023);
> > +             gem_context_set_priority(i915, ctx[1], +1023);
> > +     }
> > +
> > +     memset(obj, 0, sizeof(obj));
> > +     obj[0].handle = gem_create(i915, 4096);
> > +     gem_set_caching(i915, obj[0].handle, 1);
> > +     results = gem_mmap__cpu(i915, obj[0].handle, 0, 4096, PROT_READ);
> > +     gem_set_domain(i915, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
> > +
> > +     obj[1].handle = gem_create(i915, 4096);
> > +     memset(reloc,0, sizeof(reloc));
> > +     obj[1].relocation_count = ARRAY_SIZE(reloc);
> > +     obj[1].relocs_ptr = to_user_pointer(reloc);
> > +     bbe = gem_mmap__wc(i915, obj[1].handle, 0, 4096, PROT_WRITE);
> > +     gem_set_domain(i915, obj[1].handle,
> > +                    I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
> > +
> > +     cs = bbe;
> > +     *cs++ = 0x5 << 23;
> 
> /* ARB_CHK */
> or even better MI_ARB_CHK;

Who doesn't know these commands?

> 
> > +     *cs++ = 0x24 << 23 | 2; /* SRM */
> > +     *cs++ = mmio_base + 0x358; /* TIMESTAMP */
> > +     reloc[0].target_handle = obj[0].handle;
> > +     reloc[0].offset = (cs - bbe) * sizeof(*cs);
> > +     *cs++ = 0;
> > +     *cs++ = 0;
> > +     *cs++ = MI_BATCH_BUFFER_START | 1;
> > +     reloc[1].target_handle = obj[1].handle;
> > +     reloc[1].offset = (cs - bbe) * sizeof(*cs);
> > +     *cs++ = 0;
> > +     *cs++ = 0;
> > +
> > +     cs = bbe + 64;
> > +     *cs++ = 0x24 << 23 | 2; /* SRM */
> > +     *cs++ = mmio_base + 0x358; /* TIMESTAMP */
> > +     reloc[2].target_handle = obj[0].handle;
> > +     reloc[2].offset = (cs - bbe) * sizeof(*cs);
> > +     *cs++ = reloc[2].delta = 4;
> > +     *cs++ = 0;
> > +     *cs++ = 0x29 << 23 | 2; /* LRM */
> > +     *cs++ = mmio_base + 0x600; /* GPR0 */
> > +     reloc[3].target_handle = obj[0].handle;
> > +     reloc[3].offset = (cs - bbe) * sizeof(*cs);
> > +     *cs++ = 0;
> > +     *cs++ = 0;
> > +     *cs++ = 0x24 << 23 | 2; /* SRM */
> > +     *cs++ = mmio_base + 0x600; /* GPR0 */
> > +     reloc[4].target_handle = obj[0].handle;
> > +     reloc[4].offset = (cs - bbe) * sizeof(*cs);
> > +     *cs++ = reloc[4].delta = 8;
> > +     *cs++ = 0;
> > +     *cs++ = 0xa << 23;
> > +
> > +     memset(&eb, 0, sizeof(eb));
> > +     eb.buffers_ptr = to_user_pointer(obj);
> > +     eb.buffer_count = ARRAY_SIZE(obj);
> > +     eb.flags = engine;
> > +     eb.flags |= LOCAL_I915_EXEC_NO_RELOC;
> > +
> > +     v = 0;
> > +     igt_mean_init(&mean);
> > +     igt_until_timeout(5) {
> > +             eb.rsvd1 = ctx[0];
> > +             eb.batch_start_offset = 0;
> > +             gem_execbuf(i915, &eb);
> > +
> > +             while (results[0] == v)
> > +                     igt_assert(gem_bo_busy(i915, obj[1].handle));
> 
> Is there a race there? Object will be busy short time
> after the result appears.

No. Object will be busy before results and until a short time after --
unless there is a gpu hang, in which case the results will never appear,
but the object will no longer be busy.

> 
> > +
> > +             eb.rsvd1 = ctx[1];
> > +             eb.batch_start_offset = 64 * sizeof(*cs);
> > +             gem_execbuf(i915, &eb);
> > +
> > +             *bbe = 0xa << 23;
> 
> __sync_syncronize or are you relying on the corresponding
> syscalls?

It is not timing critical, so a lazy wcb flush is fine.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-06-06 16:16 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-06-04  8:13 [igt-dev] [PATCH i-g-t 1/2] i915/gem_exec_latency: Measure the latency of context switching Chris Wilson
2019-06-04  8:13 ` [igt-dev] [PATCH i-g-t 2/2] i915/gem_exec_latency: Add another variant of waiter latency Chris Wilson
2019-06-04 10:11 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] i915/gem_exec_latency: Measure the latency of context switching Patchwork
2019-06-04 16:26 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
2019-06-05 15:04 ` [Intel-gfx] [PATCH i-g-t 1/2] " Mika Kuoppala
2019-06-06 16:16   ` [igt-dev] " Chris Wilson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox