intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch
@ 2018-08-10 11:01 Chris Wilson
  2018-08-10 11:01 ` [PATCH i-g-t 2/3] gem_sync: Measure wakeup latency while also scheduling the next batch Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Chris Wilson @ 2018-08-10 11:01 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

This exercises a special case that may be of interest, waiting for a
context that may be preempted in order to reduce the wait.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/gem_sync.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)

diff --git a/tests/gem_sync.c b/tests/gem_sync.c
index 493ae61df..495ca3b53 100644
--- a/tests/gem_sync.c
+++ b/tests/gem_sync.c
@@ -409,6 +409,144 @@ store_ring(int fd, unsigned ring, int num_children, int timeout)
 	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
 }
 
+static void
+switch_ring(int fd, unsigned ring, int num_children, int timeout)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	unsigned engines[16];
+	const char *names[16];
+	int num_engines = 0;
+
+	gem_require_contexts(fd);
+
+	if (ring == ALL_ENGINES) {
+		for_each_physical_engine(fd, ring) {
+			if (!gem_can_store_dword(fd, ring))
+				continue;
+
+			names[num_engines] = e__->name;
+			engines[num_engines++] = ring;
+			if (num_engines == ARRAY_SIZE(engines))
+				break;
+		}
+
+		num_children *= num_engines;
+	} else {
+		gem_require_ring(fd, ring);
+		igt_require(gem_can_store_dword(fd, ring));
+		names[num_engines] = NULL;
+		engines[num_engines++] = ring;
+	}
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, num_children) {
+		struct context {
+			struct drm_i915_gem_exec_object2 object[2];
+			struct drm_i915_gem_relocation_entry reloc[1024];
+			struct drm_i915_gem_execbuffer2 execbuf;
+		} contexts[2];
+		double start, elapsed;
+		unsigned long cycles;
+
+		for (int i = 0; i < ARRAY_SIZE(contexts); i++) {
+			const uint32_t bbe = MI_BATCH_BUFFER_END;
+			const uint32_t sz = 32 << 10;
+			struct context *c = &contexts[i];
+			uint32_t *batch, *b;
+
+			memset(&c->execbuf, 0, sizeof(c->execbuf));
+			c->execbuf.buffers_ptr = to_user_pointer(c->object);
+			c->execbuf.flags = engines[child % num_engines];
+			c->execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+			c->execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+			if (gen < 6)
+				c->execbuf.flags |= I915_EXEC_SECURE;
+			c->execbuf.rsvd1 = gem_context_create(fd);
+
+			memset(c->object, 0, sizeof(c->object));
+			c->object[0].handle = gem_create(fd, 4096);
+			gem_write(fd, c->object[0].handle, 0, &bbe, sizeof(bbe));
+			c->execbuf.buffer_count = 1;
+			gem_execbuf(fd, &c->execbuf);
+
+			c->object[0].flags |= EXEC_OBJECT_WRITE;
+			c->object[1].handle = gem_create(fd, sz);
+
+			c->object[1].relocs_ptr = to_user_pointer(c->reloc);
+			c->object[1].relocation_count = 1024;
+
+			batch = gem_mmap__cpu(fd, c->object[1].handle, 0, sz,
+					PROT_WRITE | PROT_READ);
+			gem_set_domain(fd, c->object[1].handle,
+					I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+			memset(c->reloc, 0, sizeof(c->reloc));
+			b = batch;
+			for (int r = 0; r < 1024; r++) {
+				uint64_t offset;
+
+				c->reloc[r].presumed_offset = c->object[0].offset;
+				c->reloc[r].offset = (b - batch + 1) * sizeof(*batch);
+				c->reloc[r].delta = r * sizeof(uint32_t);
+				c->reloc[r].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+				c->reloc[r].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+				offset = c->object[0].offset + c->reloc[r].delta;
+				*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+				if (gen >= 8) {
+					*b++ = offset;
+					*b++ = offset >> 32;
+				} else if (gen >= 4) {
+					*b++ = 0;
+					*b++ = offset;
+					c->reloc[r].offset += sizeof(*batch);
+				} else {
+					b[-1] -= 1;
+					*b++ = offset;
+				}
+				*b++ = r;
+				*b++ = 0x5 << 23;
+			}
+			*b++ = MI_BATCH_BUFFER_END;
+			igt_assert((b - batch)*sizeof(uint32_t) < sz);
+			munmap(batch, sz);
+			c->execbuf.buffer_count = 2;
+			gem_execbuf(fd, &c->execbuf);
+			gem_sync(fd, c->object[1].handle);
+		}
+
+		cycles = 0;
+		elapsed = 0;
+		start = gettime();
+		do {
+			do {
+				double this;
+
+				gem_execbuf(fd, &contexts[0].execbuf);
+				gem_execbuf(fd, &contexts[1].execbuf);
+
+				this = gettime();
+				gem_sync(fd, contexts[1].object[1].handle);
+				elapsed += gettime() - this;
+
+				gem_sync(fd, contexts[0].object[1].handle);
+			} while (++cycles & 1023);
+		} while ((gettime() - start) < timeout);
+		igt_info("%s%sompleted %ld cycles: %.3f us\n",
+			 names[child % num_engines] ?: "",
+			 names[child % num_engines] ? " c" : "C",
+			 cycles, elapsed*1e6/cycles);
+
+		for (int i = 0; i < ARRAY_SIZE(contexts); i++) {
+			gem_close(fd, contexts[i].object[1].handle);
+			gem_close(fd, contexts[i].object[0].handle);
+			gem_context_destroy(fd, contexts[i].execbuf.rsvd1);
+		}
+	}
+	igt_waitchildren_timeout(timeout+10, NULL);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
 static void xchg(void *array, unsigned i, unsigned j)
 {
 	uint32_t *u32 = array;
@@ -884,6 +1022,10 @@ igt_main
 			wakeup_ring(fd, e->exec_id | e->flags, 150, 2);
 		igt_subtest_f("store-%s", e->name)
 			store_ring(fd, e->exec_id | e->flags, 1, 150);
+		igt_subtest_f("switch-%s", e->name)
+			switch_ring(fd, e->exec_id | e->flags, 1, 150);
+		igt_subtest_f("forked-switch-%s", e->name)
+			switch_ring(fd, e->exec_id | e->flags, ncpus, 150);
 		igt_subtest_f("many-%s", e->name)
 			store_many(fd, e->exec_id | e->flags, 150);
 		igt_subtest_f("forked-%s", e->name)
@@ -898,6 +1040,10 @@ igt_main
 		store_ring(fd, ALL_ENGINES, 1, 5);
 	igt_subtest("basic-many-each")
 		store_many(fd, ALL_ENGINES, 5);
+	igt_subtest("switch-each")
+		switch_ring(fd, ALL_ENGINES, 1, 150);
+	igt_subtest("forked-switch-each")
+		switch_ring(fd, ALL_ENGINES, ncpus, 150);
 	igt_subtest("forked-each")
 		sync_ring(fd, ALL_ENGINES, ncpus, 150);
 	igt_subtest("forked-store-each")
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH i-g-t 2/3] gem_sync: Measure wakeup latency while also scheduling the next batch
  2018-08-10 11:01 [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch Chris Wilson
@ 2018-08-10 11:01 ` Chris Wilson
  2018-08-17 17:10   ` [igt-dev] " Antonio Argenziano
  2018-08-10 11:01 ` [PATCH i-g-t 3/3] igt/gem_sync: Exercising waiting while keeping the GPU busy Chris Wilson
  2018-08-14 23:50 ` [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch Antonio Argenziano
  2 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2018-08-10 11:01 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

More variants on stress waits to serve the dual purpose of investigating
different aspects of the latency (this time while also serving
execlists interrupts) while also checking that we never miss the wakeup.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/gem_sync.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)

diff --git a/tests/gem_sync.c b/tests/gem_sync.c
index 495ca3b53..c697220ad 100644
--- a/tests/gem_sync.c
+++ b/tests/gem_sync.c
@@ -294,6 +294,144 @@ wakeup_ring(int fd, unsigned ring, int timeout, int wlen)
 	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
 }
 
+static void
+active_wakeup_ring(int fd, unsigned ring, int timeout, int wlen)
+{
+	unsigned engines[16];
+	const char *names[16];
+	int num_engines = 0;
+
+	if (ring == ALL_ENGINES) {
+		for_each_physical_engine(fd, ring) {
+			if (!gem_can_store_dword(fd, ring))
+				continue;
+
+			names[num_engines] = e__->name;
+			engines[num_engines++] = ring;
+			if (num_engines == ARRAY_SIZE(engines))
+				break;
+		}
+		igt_require(num_engines);
+	} else {
+		gem_require_ring(fd, ring);
+		igt_require(gem_can_store_dword(fd, ring));
+		names[num_engines] = NULL;
+		engines[num_engines++] = ring;
+	}
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, num_engines) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 object;
+		struct drm_i915_gem_execbuffer2 execbuf;
+		double end, this, elapsed, now, baseline;
+		unsigned long cycles;
+		igt_spin_t *spin[2];
+		uint32_t cmd;
+
+		memset(&object, 0, sizeof(object));
+		object.handle = gem_create(fd, 4096);
+		gem_write(fd, object.handle, 0, &bbe, sizeof(bbe));
+
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = to_user_pointer(&object);
+		execbuf.buffer_count = 1;
+		execbuf.flags = engines[child % num_engines];
+
+		spin[0] = __igt_spin_batch_new(fd,
+					       .engine = execbuf.flags,
+					       .flags = (IGT_SPIN_POLL_RUN |
+							 IGT_SPIN_FAST));
+		igt_assert(spin[0]->running);
+		cmd = *spin[0]->batch;
+
+		spin[1] = __igt_spin_batch_new(fd,
+					       .engine = execbuf.flags,
+					       .flags = (IGT_SPIN_POLL_RUN |
+							 IGT_SPIN_FAST));
+
+		gem_execbuf(fd, &execbuf);
+
+		igt_spin_batch_end(spin[1]);
+		igt_spin_batch_end(spin[0]);
+		gem_sync(fd, object.handle);
+
+		for (int warmup = 0; warmup <= 1; warmup++) {
+			*spin[0]->batch = cmd;
+			*spin[0]->running = 0;
+			gem_execbuf(fd, &spin[0]->execbuf);
+
+			end = gettime() + timeout/10.;
+			elapsed = 0;
+			cycles = 0;
+			do {
+				while (!READ_ONCE(*spin[0]->running))
+					;
+
+				*spin[1]->batch = cmd;
+				*spin[1]->running = 0;
+				gem_execbuf(fd, &spin[1]->execbuf);
+
+				this = gettime();
+				igt_spin_batch_end(spin[0]);
+				gem_sync(fd, spin[0]->handle);
+				now = gettime();
+
+				elapsed += now - this;
+				cycles++;
+				igt_swap(spin[0], spin[1]);
+			} while (now < end);
+			igt_spin_batch_end(spin[0]);
+			baseline = elapsed / cycles;
+		}
+		igt_info("%s%saseline %ld cycles: %.3f us\n",
+			 names[child % num_engines] ?: "",
+			 names[child % num_engines] ? " b" : "B",
+			 cycles, elapsed*1e6/cycles);
+
+		*spin[0]->batch = cmd;
+		*spin[0]->running = 0;
+		gem_execbuf(fd, &spin[0]->execbuf);
+
+		end = gettime() + timeout;
+		elapsed = 0;
+		cycles = 0;
+		do {
+			while (!READ_ONCE(*spin[0]->running))
+				;
+
+			for (int n = 0; n < wlen; n++)
+				gem_execbuf(fd, &execbuf);
+
+			*spin[1]->batch = cmd;
+			*spin[1]->running = 0;
+			gem_execbuf(fd, &spin[1]->execbuf);
+
+			this = gettime();
+			igt_spin_batch_end(spin[0]);
+			gem_sync(fd, object.handle);
+			now = gettime();
+
+			elapsed += now - this;
+			cycles++;
+			igt_swap(spin[0], spin[1]);
+		} while (now < end);
+		igt_spin_batch_end(spin[0]);
+		elapsed -= cycles * baseline;
+
+		igt_info("%s%sompleted %ld cycles: %.3f + %.3f us\n",
+			 names[child % num_engines] ?: "",
+			 names[child % num_engines] ? " c" : "C",
+			 cycles, 1e6*baseline, elapsed*1e6/cycles);
+
+		igt_spin_batch_free(fd, spin[1]);
+		igt_spin_batch_free(fd, spin[0]);
+		gem_close(fd, object.handle);
+	}
+	igt_waitchildren_timeout(2*timeout, NULL);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
 static void
 store_ring(int fd, unsigned ring, int num_children, int timeout)
 {
@@ -1018,6 +1156,8 @@ igt_main
 			idle_ring(fd, e->exec_id | e->flags, 150);
 		igt_subtest_f("wakeup-%s", e->name)
 			wakeup_ring(fd, e->exec_id | e->flags, 150, 1);
+		igt_subtest_f("active-wakeup-%s", e->name)
+			active_wakeup_ring(fd, e->exec_id | e->flags, 150, 1);
 		igt_subtest_f("double-wakeup-%s", e->name)
 			wakeup_ring(fd, e->exec_id | e->flags, 150, 2);
 		igt_subtest_f("store-%s", e->name)
@@ -1050,6 +1190,8 @@ igt_main
 		store_ring(fd, ALL_ENGINES, ncpus, 150);
 	igt_subtest("wakeup-each")
 		wakeup_ring(fd, ALL_ENGINES, 150, 1);
+	igt_subtest("active-wakeup-each")
+		active_wakeup_ring(fd, ALL_ENGINES, 150, 1);
 	igt_subtest("double-wakeup-each")
 		wakeup_ring(fd, ALL_ENGINES, 150, 2);
 
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH i-g-t 3/3] igt/gem_sync: Exercising waiting while keeping the GPU busy
  2018-08-10 11:01 [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch Chris Wilson
  2018-08-10 11:01 ` [PATCH i-g-t 2/3] gem_sync: Measure wakeup latency while also scheduling the next batch Chris Wilson
@ 2018-08-10 11:01 ` Chris Wilson
  2018-08-10 17:41   ` [igt-dev] " Antonio Argenziano
  2018-08-14 23:50 ` [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch Antonio Argenziano
  2 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2018-08-10 11:01 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Normally we wait on the last request, but that overlooks any
difficulties in waiting on a request while the next is being qeued.
Check those.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/gem_sync.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/tests/gem_sync.c b/tests/gem_sync.c
index c697220ad..fb209977d 100644
--- a/tests/gem_sync.c
+++ b/tests/gem_sync.c
@@ -294,6 +294,74 @@ wakeup_ring(int fd, unsigned ring, int timeout, int wlen)
 	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
 }
 
+static void active_ring(int fd, unsigned ring, int timeout)
+{
+	unsigned engines[16];
+	const char *names[16];
+	int num_engines = 0;
+
+	if (ring == ALL_ENGINES) {
+		for_each_physical_engine(fd, ring) {
+			if (!gem_can_store_dword(fd, ring))
+				continue;
+
+			names[num_engines] = e__->name;
+			engines[num_engines++] = ring;
+			if (num_engines == ARRAY_SIZE(engines))
+				break;
+		}
+		igt_require(num_engines);
+	} else {
+		gem_require_ring(fd, ring);
+		igt_require(gem_can_store_dword(fd, ring));
+		names[num_engines] = NULL;
+		engines[num_engines++] = ring;
+	}
+
+	intel_detect_and_clear_missed_interrupts(fd);
+	igt_fork(child, num_engines) {
+		double start, end, elapsed;
+		unsigned long cycles;
+		igt_spin_t *spin[2];
+		uint32_t cmd;
+
+		spin[0] = __igt_spin_batch_new(fd,
+					       .engine = ring,
+					       .flags = IGT_SPIN_FAST);
+		cmd = *spin[0]->batch;
+
+		spin[1] = __igt_spin_batch_new(fd,
+					       .engine = ring,
+					       .flags = IGT_SPIN_FAST);
+		igt_assert(*spin[1]->batch == cmd);
+
+		start = gettime();
+		end = start + timeout;
+		cycles = 0;
+		do {
+			for (int loop = 0; loop < 1024; loop++) {
+				igt_spin_t *s = spin[loop & 1];
+
+				igt_spin_batch_end(s);
+				gem_sync(fd, s->handle);
+
+				*s->batch = cmd;
+				gem_execbuf(fd, &s->execbuf);
+			}
+			cycles += 1024;
+		} while ((elapsed = gettime()) < end);
+		igt_spin_batch_free(fd, spin[1]);
+		igt_spin_batch_free(fd, spin[0]);
+
+		igt_info("%s%sompleted %ld cycles: %.3f us\n",
+			 names[child % num_engines] ?: "",
+			 names[child % num_engines] ? " c" : "C",
+			 cycles, (elapsed - start)*1e6/cycles);
+	}
+	igt_waitchildren_timeout(2*timeout, NULL);
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
 static void
 active_wakeup_ring(int fd, unsigned ring, int timeout, int wlen)
 {
@@ -1154,6 +1222,8 @@ igt_main
 			sync_ring(fd, e->exec_id | e->flags, 1, 150);
 		igt_subtest_f("idle-%s", e->name)
 			idle_ring(fd, e->exec_id | e->flags, 150);
+		igt_subtest_f("active-%s", e->name)
+			active_ring(fd, e->exec_id | e->flags, 150);
 		igt_subtest_f("wakeup-%s", e->name)
 			wakeup_ring(fd, e->exec_id | e->flags, 150, 1);
 		igt_subtest_f("active-wakeup-%s", e->name)
@@ -1188,6 +1258,8 @@ igt_main
 		sync_ring(fd, ALL_ENGINES, ncpus, 150);
 	igt_subtest("forked-store-each")
 		store_ring(fd, ALL_ENGINES, ncpus, 150);
+	igt_subtest("active-each")
+		active_ring(fd, ALL_ENGINES, 150);
 	igt_subtest("wakeup-each")
 		wakeup_ring(fd, ALL_ENGINES, 150, 1);
 	igt_subtest("active-wakeup-each")
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 3/3] igt/gem_sync: Exercising waiting while keeping the GPU busy
  2018-08-10 11:01 ` [PATCH i-g-t 3/3] igt/gem_sync: Exercising waiting while keeping the GPU busy Chris Wilson
@ 2018-08-10 17:41   ` Antonio Argenziano
  2018-08-10 17:51     ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Antonio Argenziano @ 2018-08-10 17:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev



On 10/08/18 04:01, Chris Wilson wrote:
> Normally we wait on the last request, but that overlooks any
> difficulties in waiting on a request while the next is being qeued.

/s/qeued/queued

> Check those.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/gem_sync.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 72 insertions(+)
> 
> diff --git a/tests/gem_sync.c b/tests/gem_sync.c
> index c697220ad..fb209977d 100644
> --- a/tests/gem_sync.c
> +++ b/tests/gem_sync.c
> @@ -294,6 +294,74 @@ wakeup_ring(int fd, unsigned ring, int timeout, int wlen)
>   	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
>   }
>   

> +	intel_detect_and_clear_missed_interrupts(fd);
> +	igt_fork(child, num_engines) {
> +		double start, end, elapsed;
> +		unsigned long cycles;
> +		igt_spin_t *spin[2];
> +		uint32_t cmd;
> +
> +		spin[0] = __igt_spin_batch_new(fd,
> +					       .engine = ring,
> +					       .flags = IGT_SPIN_FAST);
> +		cmd = *spin[0]->batch;
> +
> +		spin[1] = __igt_spin_batch_new(fd,
> +					       .engine = ring,
> +					       .flags = IGT_SPIN_FAST);
> +		igt_assert(*spin[1]->batch == cmd);
> +
> +		start = gettime();
> +		end = start + timeout;
> +		cycles = 0;
> +		do {
> +			for (int loop = 0; loop < 1024; loop++) {
> +				igt_spin_t *s = spin[loop & 1];
> +
> +				igt_spin_batch_end(s);
> +				gem_sync(fd, s->handle);

How does the test fail if the sync goes wrong? Hang detector on the 
queued batch?

Antonio

> +
> +				*s->batch = cmd;
> +				gem_execbuf(fd, &s->execbuf);
> +			}
> +			cycles += 1024;
> +		} while ((elapsed = gettime()) < end);
> +		igt_spin_batch_free(fd, spin[1]);
> +		igt_spin_batch_free(fd, spin[0]);
> +
> +		igt_info("%s%sompleted %ld cycles: %.3f us\n",
> +			 names[child % num_engines] ?: "",
> +			 names[child % num_engines] ? " c" : "C",
> +			 cycles, (elapsed - start)*1e6/cycles);
> +	}
> +	igt_waitchildren_timeout(2*timeout, NULL);
> +	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
> +}
> +

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 3/3] igt/gem_sync: Exercising waiting while keeping the GPU busy
  2018-08-10 17:41   ` [igt-dev] " Antonio Argenziano
@ 2018-08-10 17:51     ` Chris Wilson
  2018-08-10 18:11       ` Antonio Argenziano
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2018-08-10 17:51 UTC (permalink / raw)
  To: Antonio Argenziano, intel-gfx; +Cc: igt-dev

Quoting Antonio Argenziano (2018-08-10 18:41:22)
> 
> 
> On 10/08/18 04:01, Chris Wilson wrote:
> > Normally we wait on the last request, but that overlooks any
> > difficulties in waiting on a request while the next is being qeued.
> 
> /s/qeued/queued
> 
> > Check those.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/gem_sync.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 72 insertions(+)
> > 
> > diff --git a/tests/gem_sync.c b/tests/gem_sync.c
> > index c697220ad..fb209977d 100644
> > --- a/tests/gem_sync.c
> > +++ b/tests/gem_sync.c
> > @@ -294,6 +294,74 @@ wakeup_ring(int fd, unsigned ring, int timeout, int wlen)
> >       igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
> >   }
> >   
> 
> > +     intel_detect_and_clear_missed_interrupts(fd);
> > +     igt_fork(child, num_engines) {
> > +             double start, end, elapsed;
> > +             unsigned long cycles;
> > +             igt_spin_t *spin[2];
> > +             uint32_t cmd;
> > +
> > +             spin[0] = __igt_spin_batch_new(fd,
> > +                                            .engine = ring,
> > +                                            .flags = IGT_SPIN_FAST);
> > +             cmd = *spin[0]->batch;
> > +
> > +             spin[1] = __igt_spin_batch_new(fd,
> > +                                            .engine = ring,
> > +                                            .flags = IGT_SPIN_FAST);
> > +             igt_assert(*spin[1]->batch == cmd);
> > +
> > +             start = gettime();
> > +             end = start + timeout;
> > +             cycles = 0;
> > +             do {
> > +                     for (int loop = 0; loop < 1024; loop++) {
> > +                             igt_spin_t *s = spin[loop & 1];
> > +
> > +                             igt_spin_batch_end(s);
> > +                             gem_sync(fd, s->handle);
> 
> How does the test fail if the sync goes wrong? Hang detector on the 
> queued batch?

We have a hang detector for both missed wakeups and GPU hangs. As tests
goes it's fairly tame, but in essence this entire file is about trying
to trick the HW+driver into not sending an interrupt back to userspace.
Just a very narrow stress test, over and over again from slightly
different angles.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 3/3] igt/gem_sync: Exercising waiting while keeping the GPU busy
  2018-08-10 17:51     ` Chris Wilson
@ 2018-08-10 18:11       ` Antonio Argenziano
  2018-08-14 18:27         ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Antonio Argenziano @ 2018-08-10 18:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev



On 10/08/18 10:51, Chris Wilson wrote:
> Quoting Antonio Argenziano (2018-08-10 18:41:22)
>>
>>
>> On 10/08/18 04:01, Chris Wilson wrote:
>>> Normally we wait on the last request, but that overlooks any
>>> difficulties in waiting on a request while the next is being qeued.
>>
>> /s/qeued/queued
>>
>>> Check those.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    tests/gem_sync.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 72 insertions(+)
>>>
>>> diff --git a/tests/gem_sync.c b/tests/gem_sync.c
>>> index c697220ad..fb209977d 100644
>>> --- a/tests/gem_sync.c
>>> +++ b/tests/gem_sync.c
>>> @@ -294,6 +294,74 @@ wakeup_ring(int fd, unsigned ring, int timeout, int wlen)
>>>        igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
>>>    }
>>>    
>>
>>> +     intel_detect_and_clear_missed_interrupts(fd);
>>> +     igt_fork(child, num_engines) {
>>> +             double start, end, elapsed;
>>> +             unsigned long cycles;
>>> +             igt_spin_t *spin[2];
>>> +             uint32_t cmd;
>>> +
>>> +             spin[0] = __igt_spin_batch_new(fd,
>>> +                                            .engine = ring,
>>> +                                            .flags = IGT_SPIN_FAST);
>>> +             cmd = *spin[0]->batch;
>>> +
>>> +             spin[1] = __igt_spin_batch_new(fd,
>>> +                                            .engine = ring,
>>> +                                            .flags = IGT_SPIN_FAST);
>>> +             igt_assert(*spin[1]->batch == cmd);
>>> +
>>> +             start = gettime();
>>> +             end = start + timeout;
>>> +             cycles = 0;
>>> +             do {
>>> +                     for (int loop = 0; loop < 1024; loop++) {
>>> +                             igt_spin_t *s = spin[loop & 1];
>>> +
>>> +                             igt_spin_batch_end(s);
>>> +                             gem_sync(fd, s->handle);
>>
>> How does the test fail if the sync goes wrong? Hang detector on the
>> queued batch?
> 
> We have a hang detector for both missed wakeups and GPU hangs. As tests
> goes it's fairly tame, but in essence this entire file is about trying
> to trick the HW+driver into not sending an interrupt back to userspace.
> Just a very narrow stress test, over and over again from slightly
> different angles.

I see.

Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com>

> -Chris
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 3/3] igt/gem_sync: Exercising waiting while keeping the GPU busy
  2018-08-10 18:11       ` Antonio Argenziano
@ 2018-08-14 18:27         ` Chris Wilson
  2018-08-14 18:31           ` Antonio Argenziano
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2018-08-14 18:27 UTC (permalink / raw)
  To: Antonio Argenziano, intel-gfx; +Cc: igt-dev

Quoting Antonio Argenziano (2018-08-10 19:11:02)
> 
> 
> On 10/08/18 10:51, Chris Wilson wrote:
> > Quoting Antonio Argenziano (2018-08-10 18:41:22)
> >> How does the test fail if the sync goes wrong? Hang detector on the
> >> queued batch?
> > 
> > We have a hang detector for both missed wakeups and GPU hangs. As tests
> > goes it's fairly tame, but in essence this entire file is about trying
> > to trick the HW+driver into not sending an interrupt back to userspace.
> > Just a very narrow stress test, over and over again from slightly
> > different angles.
> 
> I see.
> 
> Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com>

Was that a general r-b for the very similar series or just this last
patch?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 3/3] igt/gem_sync: Exercising waiting while keeping the GPU busy
  2018-08-14 18:27         ` Chris Wilson
@ 2018-08-14 18:31           ` Antonio Argenziano
  0 siblings, 0 replies; 17+ messages in thread
From: Antonio Argenziano @ 2018-08-14 18:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev



On 14/08/18 11:27, Chris Wilson wrote:
> Quoting Antonio Argenziano (2018-08-10 19:11:02)
>>
>>
>> On 10/08/18 10:51, Chris Wilson wrote:
>>> Quoting Antonio Argenziano (2018-08-10 18:41:22)
>>>> How does the test fail if the sync goes wrong? Hang detector on the
>>>> queued batch?
>>>
>>> We have a hang detector for both missed wakeups and GPU hangs. As tests
>>> goes it's fairly tame, but in essence this entire file is about trying
>>> to trick the HW+driver into not sending an interrupt back to userspace.
>>> Just a very narrow stress test, over and over again from slightly
>>> different angles.
>>
>> I see.
>>
>> Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com>
> 
> Was that a general r-b for the very similar series or just this last
> patch?

I've only read this last patch, I'll have a look at the rest.

Antonio

> -Chris
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch
  2018-08-10 11:01 [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch Chris Wilson
  2018-08-10 11:01 ` [PATCH i-g-t 2/3] gem_sync: Measure wakeup latency while also scheduling the next batch Chris Wilson
  2018-08-10 11:01 ` [PATCH i-g-t 3/3] igt/gem_sync: Exercising waiting while keeping the GPU busy Chris Wilson
@ 2018-08-14 23:50 ` Antonio Argenziano
  2018-08-15 10:26   ` Chris Wilson
  2 siblings, 1 reply; 17+ messages in thread
From: Antonio Argenziano @ 2018-08-14 23:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev



On 10/08/18 04:01, Chris Wilson wrote:
> This exercises a special case that may be of interest, waiting for a
> context that may be preempted in order to reduce the wait.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/gem_sync.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 146 insertions(+)
> 
> diff --git a/tests/gem_sync.c b/tests/gem_sync.c
> index 493ae61df..495ca3b53 100644
> --- a/tests/gem_sync.c
> +++ b/tests/gem_sync.c
> @@ -409,6 +409,144 @@ store_ring(int fd, unsigned ring, int num_children, int timeout)
>   	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
>   }
>   
> +static void
> +switch_ring(int fd, unsigned ring, int num_children, int timeout)
> +{
> +	const int gen = intel_gen(intel_get_drm_devid(fd));
> +	unsigned engines[16];
> +	const char *names[16];
> +	int num_engines = 0;
> +
> +	gem_require_contexts(fd);
> +
> +	if (ring == ALL_ENGINES) {
> +		for_each_physical_engine(fd, ring) {
> +			if (!gem_can_store_dword(fd, ring))
> +				continue;
> +
> +			names[num_engines] = e__->name;
> +			engines[num_engines++] = ring;
> +			if (num_engines == ARRAY_SIZE(engines))
> +				break;
> +		}
> +
> +		num_children *= num_engines;
> +	} else {
> +		gem_require_ring(fd, ring);
> +		igt_require(gem_can_store_dword(fd, ring));
> +		names[num_engines] = NULL;
> +		engines[num_engines++] = ring;
> +	}
> +
> +	intel_detect_and_clear_missed_interrupts(fd);
> +	igt_fork(child, num_children) {
> +		struct context {
> +			struct drm_i915_gem_exec_object2 object[2];
> +			struct drm_i915_gem_relocation_entry reloc[1024];
> +			struct drm_i915_gem_execbuffer2 execbuf;
> +		} contexts[2];
> +		double start, elapsed;
> +		unsigned long cycles;
> +
> +		for (int i = 0; i < ARRAY_SIZE(contexts); i++) {
> +			const uint32_t bbe = MI_BATCH_BUFFER_END;
> +			const uint32_t sz = 32 << 10;
> +			struct context *c = &contexts[i];
> +			uint32_t *batch, *b;
> +
> +			memset(&c->execbuf, 0, sizeof(c->execbuf));
> +			c->execbuf.buffers_ptr = to_user_pointer(c->object);
> +			c->execbuf.flags = engines[child % num_engines];
> +			c->execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
> +			c->execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
> +			if (gen < 6)
> +				c->execbuf.flags |= I915_EXEC_SECURE;
> +			c->execbuf.rsvd1 = gem_context_create(fd);
> +
> +			memset(c->object, 0, sizeof(c->object));
> +			c->object[0].handle = gem_create(fd, 4096);
> +			gem_write(fd, c->object[0].handle, 0, &bbe, sizeof(bbe));
> +			c->execbuf.buffer_count = 1;
> +			gem_execbuf(fd, &c->execbuf);
> +
> +			c->object[0].flags |= EXEC_OBJECT_WRITE;
> +			c->object[1].handle = gem_create(fd, sz);
> +
> +			c->object[1].relocs_ptr = to_user_pointer(c->reloc);
> +			c->object[1].relocation_count = 1024;
> +
> +			batch = gem_mmap__cpu(fd, c->object[1].handle, 0, sz,
> +					PROT_WRITE | PROT_READ);
> +			gem_set_domain(fd, c->object[1].handle,
> +					I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
> +
> +			memset(c->reloc, 0, sizeof(c->reloc));
> +			b = batch;
> +			for (int r = 0; r < 1024; r++) {
> +				uint64_t offset;
> +
> +				c->reloc[r].presumed_offset = c->object[0].offset;
> +				c->reloc[r].offset = (b - batch + 1) * sizeof(*batch);
> +				c->reloc[r].delta = r * sizeof(uint32_t);
> +				c->reloc[r].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> +				c->reloc[r].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
> +
> +				offset = c->object[0].offset + c->reloc[r].delta;
> +				*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> +				if (gen >= 8) {
> +					*b++ = offset;
> +					*b++ = offset >> 32;
> +				} else if (gen >= 4) {
> +					*b++ = 0;
> +					*b++ = offset;
> +					c->reloc[r].offset += sizeof(*batch);
> +				} else {
> +					b[-1] -= 1;
> +					*b++ = offset;
> +				}
> +				*b++ = r;
> +				*b++ = 0x5 << 23;
> +			}
> +			*b++ = MI_BATCH_BUFFER_END;
> +			igt_assert((b - batch)*sizeof(uint32_t) < sz);
> +			munmap(batch, sz);
> +			c->execbuf.buffer_count = 2;
> +			gem_execbuf(fd, &c->execbuf);
> +			gem_sync(fd, c->object[1].handle);
> +		}
> +
> +		cycles = 0;
> +		elapsed = 0;
> +		start = gettime();
> +		do {
> +			do {
> +				double this;
> +
> +				gem_execbuf(fd, &contexts[0].execbuf);
> +				gem_execbuf(fd, &contexts[1].execbuf);

I'm not sure where the preemption, mentioned in the commit message, is 
coming in.

Antonio

> +
> +				this = gettime();
> +				gem_sync(fd, contexts[1].object[1].handle);
> +				elapsed += gettime() - this;
> +
> +				gem_sync(fd, contexts[0].object[1].handle);
> +			} while (++cycles & 1023);
> +		} while ((gettime() - start) < timeout);
> +		igt_info("%s%sompleted %ld cycles: %.3f us\n",
> +			 names[child % num_engines] ?: "",
> +			 names[child % num_engines] ? " c" : "C",
> +			 cycles, elapsed*1e6/cycles);
> +
> +		for (int i = 0; i < ARRAY_SIZE(contexts); i++) {
> +			gem_close(fd, contexts[i].object[1].handle);
> +			gem_close(fd, contexts[i].object[0].handle);
> +			gem_context_destroy(fd, contexts[i].execbuf.rsvd1);
> +		}
> +	}
> +	igt_waitchildren_timeout(timeout+10, NULL);
> +	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
> +}
> +
>   static void xchg(void *array, unsigned i, unsigned j)
>   {
>   	uint32_t *u32 = array;
> @@ -884,6 +1022,10 @@ igt_main
>   			wakeup_ring(fd, e->exec_id | e->flags, 150, 2);
>   		igt_subtest_f("store-%s", e->name)
>   			store_ring(fd, e->exec_id | e->flags, 1, 150);
> +		igt_subtest_f("switch-%s", e->name)
> +			switch_ring(fd, e->exec_id | e->flags, 1, 150);
> +		igt_subtest_f("forked-switch-%s", e->name)
> +			switch_ring(fd, e->exec_id | e->flags, ncpus, 150);
>   		igt_subtest_f("many-%s", e->name)
>   			store_many(fd, e->exec_id | e->flags, 150);
>   		igt_subtest_f("forked-%s", e->name)
> @@ -898,6 +1040,10 @@ igt_main
>   		store_ring(fd, ALL_ENGINES, 1, 5);
>   	igt_subtest("basic-many-each")
>   		store_many(fd, ALL_ENGINES, 5);
> +	igt_subtest("switch-each")
> +		switch_ring(fd, ALL_ENGINES, 1, 150);
> +	igt_subtest("forked-switch-each")
> +		switch_ring(fd, ALL_ENGINES, ncpus, 150);
>   	igt_subtest("forked-each")
>   		sync_ring(fd, ALL_ENGINES, ncpus, 150);
>   	igt_subtest("forked-store-each")
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch
  2018-08-14 23:50 ` [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch Antonio Argenziano
@ 2018-08-15 10:26   ` Chris Wilson
  2018-08-15 17:20     ` Antonio Argenziano
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2018-08-15 10:26 UTC (permalink / raw)
  To: Antonio Argenziano, intel-gfx; +Cc: igt-dev

Quoting Antonio Argenziano (2018-08-15 00:50:43)
> 
> 
> On 10/08/18 04:01, Chris Wilson wrote:
> > This exercises a special case that may be of interest, waiting for a
> > context that may be preempted in order to reduce the wait.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> > +             cycles = 0;
> > +             elapsed = 0;
> > +             start = gettime();
> > +             do {
> > +                     do {
> > +                             double this;
> > +
> > +                             gem_execbuf(fd, &contexts[0].execbuf);
> > +                             gem_execbuf(fd, &contexts[1].execbuf);
> 
> I'm not sure where the preemption, mentioned in the commit message, is 
> coming in.

Internally. I've suggested that we reorder equivalent contexts in order
to satisfy client waits earlier. So having created two independent
request queues, userspace should be oblivious to the execution order.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch
  2018-08-15 10:26   ` Chris Wilson
@ 2018-08-15 17:20     ` Antonio Argenziano
  2018-08-15 17:24       ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Antonio Argenziano @ 2018-08-15 17:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev



On 15/08/18 03:26, Chris Wilson wrote:
> Quoting Antonio Argenziano (2018-08-15 00:50:43)
>>
>>
>> On 10/08/18 04:01, Chris Wilson wrote:
>>> This exercises a special case that may be of interest, waiting for a
>>> context that may be preempted in order to reduce the wait.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>> +             cycles = 0;
>>> +             elapsed = 0;
>>> +             start = gettime();
>>> +             do {
>>> +                     do {
>>> +                             double this;
>>> +
>>> +                             gem_execbuf(fd, &contexts[0].execbuf);
>>> +                             gem_execbuf(fd, &contexts[1].execbuf);
>>
>> I'm not sure where the preemption, mentioned in the commit message, is
>> coming in.
> 
> Internally. I've suggested that we reorder equivalent contexts in order
> to satisfy client waits earlier. So having created two independent
> request queues, userspace should be oblivious to the execution order.

But there isn't an assert because you don't want that to be part of the 
contract between the driver and userspace, is that correct?

Antonio

> -Chris
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch
  2018-08-15 17:20     ` Antonio Argenziano
@ 2018-08-15 17:24       ` Chris Wilson
  2018-08-15 23:59         ` Antonio Argenziano
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2018-08-15 17:24 UTC (permalink / raw)
  To: Antonio Argenziano, intel-gfx; +Cc: igt-dev

Quoting Antonio Argenziano (2018-08-15 18:20:10)
> 
> 
> On 15/08/18 03:26, Chris Wilson wrote:
> > Quoting Antonio Argenziano (2018-08-15 00:50:43)
> >>
> >>
> >> On 10/08/18 04:01, Chris Wilson wrote:
> >>> This exercises a special case that may be of interest, waiting for a
> >>> context that may be preempted in order to reduce the wait.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> ---
> >>> +             cycles = 0;
> >>> +             elapsed = 0;
> >>> +             start = gettime();
> >>> +             do {
> >>> +                     do {
> >>> +                             double this;
> >>> +
> >>> +                             gem_execbuf(fd, &contexts[0].execbuf);
> >>> +                             gem_execbuf(fd, &contexts[1].execbuf);
> >>
> >> I'm not sure where the preemption, mentioned in the commit message, is
> >> coming in.
> > 
> > Internally. I've suggested that we reorder equivalent contexts in order
> > to satisfy client waits earlier. So having created two independent
> > request queues, userspace should be oblivious to the execution order.
> 
> But there isn't an assert because you don't want that to be part of the 
> contract between the driver and userspace, is that correct?

Correct. Userspace hasn't specified an order between the two contexts so
can't actually assert it happens in a particular order. We are free then
to do whatever we like, but that also means no assertion. Just the
figures look pretty and ofc we have to check that nothing actually
breaks.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch
  2018-08-15 17:24       ` Chris Wilson
@ 2018-08-15 23:59         ` Antonio Argenziano
  2018-08-16  7:08           ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Antonio Argenziano @ 2018-08-15 23:59 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev



On 15/08/18 10:24, Chris Wilson wrote:
> Quoting Antonio Argenziano (2018-08-15 18:20:10)
>>
>>
>> On 15/08/18 03:26, Chris Wilson wrote:
>>> Quoting Antonio Argenziano (2018-08-15 00:50:43)
>>>>
>>>>
>>>> On 10/08/18 04:01, Chris Wilson wrote:
>>>>> This exercises a special case that may be of interest, waiting for a
>>>>> context that may be preempted in order to reduce the wait.
>>>>>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> ---
>>>>> +             cycles = 0;
>>>>> +             elapsed = 0;
>>>>> +             start = gettime();
>>>>> +             do {
>>>>> +                     do {
>>>>> +                             double this;
>>>>> +
>>>>> +                             gem_execbuf(fd, &contexts[0].execbuf);
>>>>> +                             gem_execbuf(fd, &contexts[1].execbuf);
>>>>
>>>> I'm not sure where the preemption, mentioned in the commit message, is
>>>> coming in.
>>>
>>> Internally. I've suggested that we reorder equivalent contexts in order
>>> to satisfy client waits earlier. So having created two independent
>>> request queues, userspace should be oblivious to the execution order.
>>
>> But there isn't an assert because you don't want that to be part of the
>> contract between the driver and userspace, is that correct?
> 
> Correct. Userspace hasn't specified an order between the two contexts so
> can't actually assert it happens in a particular order. We are free then
> to do whatever we like, but that also means no assertion. Just the
> figures look pretty and ofc we have to check that nothing actually
> breaks.

The last question I have is about the batches, why not choosing a spin 
batch so to make sure that context[0] (and [1]) hasn't completed by the 
time it starts waiting.

Antonio

> -Chris
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch
  2018-08-15 23:59         ` Antonio Argenziano
@ 2018-08-16  7:08           ` Chris Wilson
  2018-08-16 17:42             ` Antonio Argenziano
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2018-08-16  7:08 UTC (permalink / raw)
  To: Antonio Argenziano, intel-gfx; +Cc: igt-dev

Quoting Antonio Argenziano (2018-08-16 00:59:30)
> 
> 
> On 15/08/18 10:24, Chris Wilson wrote:
> > Quoting Antonio Argenziano (2018-08-15 18:20:10)
> >>
> >>
> >> On 15/08/18 03:26, Chris Wilson wrote:
> >>> Quoting Antonio Argenziano (2018-08-15 00:50:43)
> >>>>
> >>>>
> >>>> On 10/08/18 04:01, Chris Wilson wrote:
> >>>>> This exercises a special case that may be of interest, waiting for a
> >>>>> context that may be preempted in order to reduce the wait.
> >>>>>
> >>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>>>> ---
> >>>>> +             cycles = 0;
> >>>>> +             elapsed = 0;
> >>>>> +             start = gettime();
> >>>>> +             do {
> >>>>> +                     do {
> >>>>> +                             double this;
> >>>>> +
> >>>>> +                             gem_execbuf(fd, &contexts[0].execbuf);
> >>>>> +                             gem_execbuf(fd, &contexts[1].execbuf);
> >>>>
> >>>> I'm not sure where the preemption, mentioned in the commit message, is
> >>>> coming in.
> >>>
> >>> Internally. I've suggested that we reorder equivalent contexts in order
> >>> to satisfy client waits earlier. So having created two independent
> >>> request queues, userspace should be oblivious to the execution order.
> >>
> >> But there isn't an assert because you don't want that to be part of the
> >> contract between the driver and userspace, is that correct?
> > 
> > Correct. Userspace hasn't specified an order between the two contexts so
> > can't actually assert it happens in a particular order. We are free then
> > to do whatever we like, but that also means no assertion. Just the
> > figures look pretty and ofc we have to check that nothing actually
> > breaks.
> 
> The last question I have is about the batches, why not choosing a spin 
> batch so to make sure that context[0] (and [1]) hasn't completed by the 
> time it starts waiting.

It would be exercising fewer possibilities. Not that it would be any
less valid. (If I can't do a pair of trivial execbuf faster than the gpu
can execute a no-op from idle, shoot me. Each execbuf will take ~500ns,
the gpu will take 20-50us [bdw-kbl] to execute the first batch from idle.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch
  2018-08-16  7:08           ` Chris Wilson
@ 2018-08-16 17:42             ` Antonio Argenziano
  2018-08-16 17:48               ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Antonio Argenziano @ 2018-08-16 17:42 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev



On 16/08/18 00:08, Chris Wilson wrote:
> Quoting Antonio Argenziano (2018-08-16 00:59:30)
>>
>>
>> On 15/08/18 10:24, Chris Wilson wrote:
>>> Quoting Antonio Argenziano (2018-08-15 18:20:10)
>>>>
>>>>
>>>> On 15/08/18 03:26, Chris Wilson wrote:
>>>>> Quoting Antonio Argenziano (2018-08-15 00:50:43)
>>>>>>
>>>>>>
>>>>>> On 10/08/18 04:01, Chris Wilson wrote:
>>>>>>> This exercises a special case that may be of interest, waiting for a
>>>>>>> context that may be preempted in order to reduce the wait.
>>>>>>>
>>>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>>> ---
>>>>>>> +             cycles = 0;
>>>>>>> +             elapsed = 0;
>>>>>>> +             start = gettime();
>>>>>>> +             do {
>>>>>>> +                     do {
>>>>>>> +                             double this;
>>>>>>> +
>>>>>>> +                             gem_execbuf(fd, &contexts[0].execbuf);
>>>>>>> +                             gem_execbuf(fd, &contexts[1].execbuf);
>>>>>>
>>>>>> I'm not sure where the preemption, mentioned in the commit message, is
>>>>>> coming in.
>>>>>
>>>>> Internally. I've suggested that we reorder equivalent contexts in order
>>>>> to satisfy client waits earlier. So having created two independent
>>>>> request queues, userspace should be oblivious to the execution order.
>>>>
>>>> But there isn't an assert because you don't want that to be part of the
>>>> contract between the driver and userspace, is that correct?
>>>
>>> Correct. Userspace hasn't specified an order between the two contexts so
>>> can't actually assert it happens in a particular order. We are free then
>>> to do whatever we like, but that also means no assertion. Just the
>>> figures look pretty and ofc we have to check that nothing actually
>>> breaks.
>>
>> The last question I have is about the batches, why not choosing a spin
>> batch so to make sure that context[0] (and [1]) hasn't completed by the
>> time it starts waiting.
> 
> It would be exercising fewer possibilities. Not that it would be any
> less valid. (If I can't do a pair of trivial execbuf faster than the gpu
> can execute a no-op from idle, shoot me. Each execbuf will take ~500ns,
> the gpu will take 20-50us [bdw-kbl] to execute the first batch from idle.)

It would generate some odd looking numbers anyways.

Reviewed-By: Antonio Argenziano <antonio.argenziano@intel.com>

> -Chris
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch
  2018-08-16 17:42             ` Antonio Argenziano
@ 2018-08-16 17:48               ` Chris Wilson
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2018-08-16 17:48 UTC (permalink / raw)
  To: Antonio Argenziano, intel-gfx; +Cc: igt-dev

Quoting Antonio Argenziano (2018-08-16 18:42:17)
> 
> 
> On 16/08/18 00:08, Chris Wilson wrote:
> > Quoting Antonio Argenziano (2018-08-16 00:59:30)
> >>
> >>
> >> On 15/08/18 10:24, Chris Wilson wrote:
> >>> Quoting Antonio Argenziano (2018-08-15 18:20:10)
> >>>>
> >>>>
> >>>> On 15/08/18 03:26, Chris Wilson wrote:
> >>>>> Quoting Antonio Argenziano (2018-08-15 00:50:43)
> >>>>>>
> >>>>>>
> >>>>>> On 10/08/18 04:01, Chris Wilson wrote:
> >>>>>>> This exercises a special case that may be of interest, waiting for a
> >>>>>>> context that may be preempted in order to reduce the wait.
> >>>>>>>
> >>>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>>>>>> ---
> >>>>>>> +             cycles = 0;
> >>>>>>> +             elapsed = 0;
> >>>>>>> +             start = gettime();
> >>>>>>> +             do {
> >>>>>>> +                     do {
> >>>>>>> +                             double this;
> >>>>>>> +
> >>>>>>> +                             gem_execbuf(fd, &contexts[0].execbuf);
> >>>>>>> +                             gem_execbuf(fd, &contexts[1].execbuf);
> >>>>>>
> >>>>>> I'm not sure where the preemption, mentioned in the commit message, is
> >>>>>> coming in.
> >>>>>
> >>>>> Internally. I've suggested that we reorder equivalent contexts in order
> >>>>> to satisfy client waits earlier. So having created two independent
> >>>>> request queues, userspace should be oblivious to the execution order.
> >>>>
> >>>> But there isn't an assert because you don't want that to be part of the
> >>>> contract between the driver and userspace, is that correct?
> >>>
> >>> Correct. Userspace hasn't specified an order between the two contexts so
> >>> can't actually assert it happens in a particular order. We are free then
> >>> to do whatever we like, but that also means no assertion. Just the
> >>> figures look pretty and ofc we have to check that nothing actually
> >>> breaks.
> >>
> >> The last question I have is about the batches, why not choosing a spin
> >> batch so to make sure that context[0] (and [1]) hasn't completed by the
> >> time it starts waiting.
> > 
> > It would be exercising fewer possibilities. Not that it would be any
> > less valid. (If I can't do a pair of trivial execbuf faster than the gpu
> > can execute a no-op from idle, shoot me. Each execbuf will take ~500ns,
> > the gpu will take 20-50us [bdw-kbl] to execute the first batch from idle.)
> 
> It would generate some odd looking numbers anyways.

It would give an indirect measure of preemption latency. I think we have
a slightly better measure via gem_exec_latency, but it's an interesting
variation at least. Certainly deserves to be in the magic cookbook of
the ultimate microbenchmarks.

Too much magic, not enough casting, alas.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 2/3] gem_sync: Measure wakeup latency while also scheduling the next batch
  2018-08-10 11:01 ` [PATCH i-g-t 2/3] gem_sync: Measure wakeup latency while also scheduling the next batch Chris Wilson
@ 2018-08-17 17:10   ` Antonio Argenziano
  0 siblings, 0 replies; 17+ messages in thread
From: Antonio Argenziano @ 2018-08-17 17:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev



On 10/08/18 04:01, Chris Wilson wrote:
> More variants on stress waits to serve the dual purpose of investigating
> different aspects of the latency (this time while also serving
> execlists interrupts) while also checking that we never miss the wakeup.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/gem_sync.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 142 insertions(+)
> 
> diff --git a/tests/gem_sync.c b/tests/gem_sync.c
> index 495ca3b53..c697220ad 100644
> --- a/tests/gem_sync.c
> +++ b/tests/gem_sync.c
> @@ -294,6 +294,144 @@ wakeup_ring(int fd, unsigned ring, int timeout, int wlen)
>   	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
>   }
>   
> +static void
> +active_wakeup_ring(int fd, unsigned ring, int timeout, int wlen)
> +{
> +	unsigned engines[16];
> +	const char *names[16];
> +	int num_engines = 0;
> +
> +	if (ring == ALL_ENGINES) {
> +		for_each_physical_engine(fd, ring) {
> +			if (!gem_can_store_dword(fd, ring))
> +				continue;
> +
> +			names[num_engines] = e__->name;
> +			engines[num_engines++] = ring;
> +			if (num_engines == ARRAY_SIZE(engines))

If num_engines is larger than engines it is time to get  a bigger array 
:). I think a require/assert would actually force someone to make the 
change (otherwise none will ever do it)

> +				break;
> +		}
> +		igt_require(num_engines);
> +	} else {
> +		gem_require_ring(fd, ring);
> +		igt_require(gem_can_store_dword(fd, ring));
> +		names[num_engines] = NULL;
> +		engines[num_engines++] = ring;
> +	}
> +
> +	intel_detect_and_clear_missed_interrupts(fd);
> +	igt_fork(child, num_engines) {
> +		const uint32_t bbe = MI_BATCH_BUFFER_END;
> +		struct drm_i915_gem_exec_object2 object;
> +		struct drm_i915_gem_execbuffer2 execbuf;
> +		double end, this, elapsed, now, baseline;
> +		unsigned long cycles;
> +		igt_spin_t *spin[2];
> +		uint32_t cmd;
> +
> +		memset(&object, 0, sizeof(object));
> +		object.handle = gem_create(fd, 4096);
> +		gem_write(fd, object.handle, 0, &bbe, sizeof(bbe));
> +
> +		memset(&execbuf, 0, sizeof(execbuf));
> +		execbuf.buffers_ptr = to_user_pointer(&object);
> +		execbuf.buffer_count = 1;
> +		execbuf.flags = engines[child % num_engines];
> +
> +		spin[0] = __igt_spin_batch_new(fd,
> +					       .engine = execbuf.flags,
> +					       .flags = (IGT_SPIN_POLL_RUN |
> +							 IGT_SPIN_FAST));
> +		igt_assert(spin[0]->running);
> +		cmd = *spin[0]->batch;
> +
> +		spin[1] = __igt_spin_batch_new(fd,
> +					       .engine = execbuf.flags,
> +					       .flags = (IGT_SPIN_POLL_RUN |
> +							 IGT_SPIN_FAST));
> +
> +		gem_execbuf(fd, &execbuf);
> +
> +		igt_spin_batch_end(spin[1]);
> +		igt_spin_batch_end(spin[0]);
> +		gem_sync(fd, object.handle);
> +
> +		for (int warmup = 0; warmup <= 1; warmup++) {
> +			*spin[0]->batch = cmd;
> +			*spin[0]->running = 0;
> +			gem_execbuf(fd, &spin[0]->execbuf);
> +
> +			end = gettime() + timeout/10.;
> +			elapsed = 0;
> +			cycles = 0;
> +			do {
> +				while (!READ_ONCE(*spin[0]->running))
> +					;
> +
> +				*spin[1]->batch = cmd;
> +				*spin[1]->running = 0;
> +				gem_execbuf(fd, &spin[1]->execbuf);
> +
> +				this = gettime();
> +				igt_spin_batch_end(spin[0]);
> +				gem_sync(fd, spin[0]->handle);
> +				now = gettime();
> +
> +				elapsed += now - this;
> +				cycles++;
> +				igt_swap(spin[0], spin[1]);
> +			} while (now < end);
> +			igt_spin_batch_end(spin[0]);
> +			baseline = elapsed / cycles;
> +		}
> +		igt_info("%s%saseline %ld cycles: %.3f us\n",
> +			 names[child % num_engines] ?: "",
> +			 names[child % num_engines] ? " b" : "B",
> +			 cycles, elapsed*1e6/cycles);
> +
> +		*spin[0]->batch = cmd;
> +		*spin[0]->running = 0;
> +		gem_execbuf(fd, &spin[0]->execbuf);
> +
> +		end = gettime() + timeout;
> +		elapsed = 0;
> +		cycles = 0;
> +		do {
> +			while (!READ_ONCE(*spin[0]->running))
> +				;
> +
> +			for (int n = 0; n < wlen; n++)
> +				gem_execbuf(fd, &execbuf);
> +
> +			*spin[1]->batch = cmd;
> +			*spin[1]->running = 0;
> +			gem_execbuf(fd, &spin[1]->execbuf);
> +
> +			this = gettime();
> +			igt_spin_batch_end(spin[0]);
> +			gem_sync(fd, object.handle);

it looks like all requests are on the same ctx, do you care about it? We 
should still service the interrupt after each request so it should be fine.

Thanks,
Antonio

> +			now = gettime();
> +
> +			elapsed += now - this;
> +			cycles++;
> +			igt_swap(spin[0], spin[1]);
> +		} while (now < end);
> +		igt_spin_batch_end(spin[0]);
> +		elapsed -= cycles * baseline;
> +
> +		igt_info("%s%sompleted %ld cycles: %.3f + %.3f us\n",
> +			 names[child % num_engines] ?: "",
> +			 names[child % num_engines] ? " c" : "C",
> +			 cycles, 1e6*baseline, elapsed*1e6/cycles);
> +
> +		igt_spin_batch_free(fd, spin[1]);
> +		igt_spin_batch_free(fd, spin[0]);
> +		gem_close(fd, object.handle);
> +	}
> +	igt_waitchildren_timeout(2*timeout, NULL);
> +	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
> +}
> +
>   static void
>   store_ring(int fd, unsigned ring, int num_children, int timeout)
>   {
> @@ -1018,6 +1156,8 @@ igt_main
>   			idle_ring(fd, e->exec_id | e->flags, 150);
>   		igt_subtest_f("wakeup-%s", e->name)
>   			wakeup_ring(fd, e->exec_id | e->flags, 150, 1);
> +		igt_subtest_f("active-wakeup-%s", e->name)
> +			active_wakeup_ring(fd, e->exec_id | e->flags, 150, 1);
>   		igt_subtest_f("double-wakeup-%s", e->name)
>   			wakeup_ring(fd, e->exec_id | e->flags, 150, 2);
>   		igt_subtest_f("store-%s", e->name)
> @@ -1050,6 +1190,8 @@ igt_main
>   		store_ring(fd, ALL_ENGINES, ncpus, 150);
>   	igt_subtest("wakeup-each")
>   		wakeup_ring(fd, ALL_ENGINES, 150, 1);
> +	igt_subtest("active-wakeup-each")
> +		active_wakeup_ring(fd, ALL_ENGINES, 150, 1);
>   	igt_subtest("double-wakeup-each")
>   		wakeup_ring(fd, ALL_ENGINES, 150, 2);
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2018-08-17 17:10 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-08-10 11:01 [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch Chris Wilson
2018-08-10 11:01 ` [PATCH i-g-t 2/3] gem_sync: Measure wakeup latency while also scheduling the next batch Chris Wilson
2018-08-17 17:10   ` [igt-dev] " Antonio Argenziano
2018-08-10 11:01 ` [PATCH i-g-t 3/3] igt/gem_sync: Exercising waiting while keeping the GPU busy Chris Wilson
2018-08-10 17:41   ` [igt-dev] " Antonio Argenziano
2018-08-10 17:51     ` Chris Wilson
2018-08-10 18:11       ` Antonio Argenziano
2018-08-14 18:27         ` Chris Wilson
2018-08-14 18:31           ` Antonio Argenziano
2018-08-14 23:50 ` [PATCH i-g-t 1/3] igt/gem_sync: Exercise sync after context switch Antonio Argenziano
2018-08-15 10:26   ` Chris Wilson
2018-08-15 17:20     ` Antonio Argenziano
2018-08-15 17:24       ` Chris Wilson
2018-08-15 23:59         ` Antonio Argenziano
2018-08-16  7:08           ` Chris Wilson
2018-08-16 17:42             ` Antonio Argenziano
2018-08-16 17:48               ` Chris Wilson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).