From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: igt-dev@lists.freedesktop.org, matthew.auld@intel.com,
tvrtko.ursulin@intel.com
Subject: Re: [igt-dev] [Intel-gfx] [PATCH i-g-t 2/3] i915/gem_cpu_reloc: Use a self-modifying chained batch
Date: Wed, 16 Jan 2019 16:22:59 +0200 [thread overview]
Message-ID: <87fttsy94s.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20190116093509.30195-2-chris@chris-wilson.co.uk>
Chris Wilson <chris@chris-wilson.co.uk> writes:
> Use another sensitive CPU reloc to emit a chained batch from inside the
> updated buffer to reduce the workload on slow machines to fit within the
> CI timeout.
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=108248
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> tests/i915/gem_cpu_reloc.c | 347 ++++++++++++++++++++-----------------
> 1 file changed, 189 insertions(+), 158 deletions(-)
>
> diff --git a/tests/i915/gem_cpu_reloc.c b/tests/i915/gem_cpu_reloc.c
> index 882c312d4..33c4e4e0f 100644
> --- a/tests/i915/gem_cpu_reloc.c
> +++ b/tests/i915/gem_cpu_reloc.c
> @@ -59,214 +59,245 @@
>
> #include "intel_bufmgr.h"
>
> -IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
> +#define MI_INSTR(opcode, flags) ((opcode) << 23 | (flags))
>
> -static uint32_t use_blt;
> +IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
>
> -static void copy(int fd, uint32_t batch, uint32_t src, uint32_t dst)
> +static uint32_t *
> +gen2_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
> {
> - struct drm_i915_gem_execbuffer2 execbuf;
> - struct drm_i915_gem_relocation_entry gem_reloc[2];
> - struct drm_i915_gem_exec_object2 gem_exec[3];
> -
> - gem_reloc[0].offset = 4 * sizeof(uint32_t);
> - gem_reloc[0].delta = 0;
> - gem_reloc[0].target_handle = dst;
> - gem_reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> - gem_reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> - gem_reloc[0].presumed_offset = -1;
> -
> - gem_reloc[1].offset = 7 * sizeof(uint32_t);
> - if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> - gem_reloc[1].offset += sizeof(uint32_t);
> - gem_reloc[1].delta = 0;
> - gem_reloc[1].target_handle = src;
> - gem_reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> - gem_reloc[1].write_domain = 0;
> - gem_reloc[1].presumed_offset = -1;
> -
> - memset(gem_exec, 0, sizeof(gem_exec));
> - gem_exec[0].handle = src;
> - gem_exec[1].handle = dst;
> - gem_exec[2].handle = batch;
> - gem_exec[2].relocation_count = 2;
> - gem_exec[2].relocs_ptr = to_user_pointer(gem_reloc);
> -
> - memset(&execbuf, 0, sizeof(execbuf));
> - execbuf.buffers_ptr = to_user_pointer(gem_exec);
> - execbuf.buffer_count = 3;
> - execbuf.batch_len = 4096;
> - execbuf.flags = use_blt;
> -
> - gem_execbuf(fd, &execbuf);
> + *cs++ = MI_STORE_DWORD_IMM - 1;
> + addr->offset += sizeof(*cs);
> + cs += 1; /* addr */
> + cs += 1; /* value: implicit 0xffffffff */
> + return cs;
> +}
> +static uint32_t *
> +gen4_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
> +{
> + *cs++ = MI_STORE_DWORD_IMM;
> + *cs++ = 0;
> + addr->offset += 2 * sizeof(*cs);
> + cs += 1; /* addr */
> + cs += 1; /* value: implicit 0xffffffff */
> + return cs;
> +}
> +static uint32_t *
> +gen8_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
> +{
> + *cs++ = (MI_STORE_DWORD_IMM | 1 << 21) + 1;
> + addr->offset += sizeof(*cs);
> + igt_assert((addr->delta & 7) == 0);
> + cs += 2; /* addr */
> + cs += 2; /* value: implicit 0xffffffffffffffff */
> + return cs;
> }
>
> -static void exec(int fd, uint32_t handle)
> +static uint32_t *
> +gen2_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> {
> - struct drm_i915_gem_execbuffer2 execbuf;
> - struct drm_i915_gem_exec_object2 gem_exec;
> + *cs++ = MI_BATCH_BUFFER_START | 2 << 6;
> + reloc->offset += sizeof(*cs);
> + reloc->delta += 1;
> + cs += 1; /* addr */
> + return cs;
> +}
> +static uint32_t *
> +gen4_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> +{
> + *cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8;
> + reloc->offset += sizeof(*cs);
> + cs += 1; /* addr */
> + return cs;
> +}
> +static uint32_t *
> +gen6_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> +{
> + *cs++ = MI_BATCH_BUFFER_START | 1 << 8;
> + reloc->offset += sizeof(*cs);
> + cs += 1; /* addr */
> + return cs;
> +}
> +static uint32_t *
> +hsw_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> +{
> + *cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8 | 1 << 13;
> + reloc->offset += sizeof(*cs);
> + cs += 1; /* addr */
> + return cs;
> +}
> +static uint32_t *
> +gen8_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> +{
> + if (((uintptr_t)cs & 7) == 0) {
> + *cs++ = MI_NOOP; /* align addr for MI_STORE_DWORD_IMM */
> + reloc->offset += sizeof(*cs);
> + }
Align it so that after the bb start emitted we are in right alignment?
Otherwise it looks it should have '!' in it.
-Mika
>
> - memset(&gem_exec, 0, sizeof(gem_exec));
> - gem_exec.handle = handle;
> + *cs++ = MI_BATCH_BUFFER_START + 1;
> + reloc->offset += sizeof(*cs);
> + cs += 2; /* addr */
>
> - memset(&execbuf, 0, sizeof(execbuf));
> - execbuf.buffers_ptr = to_user_pointer(&gem_exec);
> - execbuf.buffer_count = 1;
> - execbuf.batch_len = 4096;
> + return cs;
> +}
>
> - gem_execbuf(fd, &execbuf);
> +static void *
> +create_tmpl(int i915, struct drm_i915_gem_relocation_entry *reloc)
> +{
> + const uint32_t devid = intel_get_drm_devid(i915);
> + const int gen = intel_gen(devid);
> + uint32_t *(*emit_store_addr)(uint32_t *cs,
> + struct drm_i915_gem_relocation_entry *addr);
> + uint32_t *(*emit_bb_start)(uint32_t *cs,
> + struct drm_i915_gem_relocation_entry *reloc);
> + void *tmpl;
> +
> + /* could use BLT_FILL instead for gen2 */
> + igt_require(gem_can_store_dword(i915, 0));
> +
> + if (gen >= 8)
> + emit_store_addr = gen8_emit_store_addr;
> + else if (gen >= 4)
> + emit_store_addr = gen4_emit_store_addr;
> + else
> + emit_store_addr = gen2_emit_store_addr;
> +
> + if (gen >= 8)
> + emit_bb_start = gen8_emit_bb_start;
> + else if (IS_HASWELL(devid))
> + emit_bb_start = hsw_emit_bb_start;
> + else if (gen >= 6)
> + emit_bb_start = gen6_emit_bb_start;
> + else if (gen >= 4)
> + emit_bb_start = gen4_emit_bb_start;
> + else
> + emit_bb_start = gen2_emit_bb_start;
> +
> + tmpl = malloc(4096);
> + igt_assert(tmpl);
> + memset(tmpl, 0xff, 4096);
> +
> + /* Jump over the booby traps to the end */
> + reloc[0].delta = 64;
> + emit_bb_start(tmpl, &reloc[0]);
> +
> + /* Restore the bad address to catch missing relocs */
> + reloc[1].offset = 64;
> + reloc[1].delta = reloc[0].offset;
> + *emit_store_addr(tmpl + 64, &reloc[1]) = MI_BATCH_BUFFER_END;
> +
> + return tmpl;
> }
>
> -uint32_t gen6_batch[] = {
> - (XY_SRC_COPY_BLT_CMD | 6 |
> - XY_SRC_COPY_BLT_WRITE_ALPHA |
> - XY_SRC_COPY_BLT_WRITE_RGB),
> - (3 << 24 | /* 32 bits */
> - 0xcc << 16 | /* copy ROP */
> - 4096),
> - 0 << 16 | 0, /* dst x1, y1 */
> - 1 << 16 | 2,
> - 0, /* dst relocation */
> - 0 << 16 | 0, /* src x1, y1 */
> - 4096,
> - 0, /* src relocation */
> - MI_BATCH_BUFFER_END,
> -};
> -
> -uint32_t gen8_batch[] = {
> - (XY_SRC_COPY_BLT_CMD | 8 |
> - XY_SRC_COPY_BLT_WRITE_ALPHA |
> - XY_SRC_COPY_BLT_WRITE_RGB),
> - (3 << 24 | /* 32 bits */
> - 0xcc << 16 | /* copy ROP */
> - 4096),
> - 0 << 16 | 0, /* dst x1, y1 */
> - 1 << 16 | 2,
> - 0, /* dst relocation */
> - 0, /* FIXME */
> - 0 << 16 | 0, /* src x1, y1 */
> - 4096,
> - 0, /* src relocation */
> - 0, /* FIXME */
> - MI_BATCH_BUFFER_END,
> -};
> -
> -uint32_t *batch = gen6_batch;
> -uint32_t batch_size = sizeof(gen6_batch);
> -
> -static void run_test(int fd, int count)
> +static void run_test(int i915, int count)
> {
> - const uint32_t hang[] = {-1, -1, -1, -1};
> - const uint32_t end[] = {MI_BATCH_BUFFER_END, 0};
> - uint32_t noop;
> - uint32_t *handles;
> - int i;
> + struct drm_i915_gem_execbuffer2 execbuf;
> + struct drm_i915_gem_relocation_entry reloc[2];
> + struct drm_i915_gem_exec_object2 obj;
>
> - noop = intel_get_drm_devid(fd);
> + uint32_t *handles;
> + uint32_t *tmpl;
>
> - use_blt = 0;
> - if (intel_gen(noop) >= 6)
> - use_blt = I915_EXEC_BLT;
> + handles = malloc(count * sizeof(uint32_t));
> + igt_assert(handles);
>
> - if (intel_gen(noop) >= 8) {
> - batch = gen8_batch;
> - batch_size += 2 * 4;
> + memset(reloc, 0, sizeof(reloc));
> + tmpl = create_tmpl(i915, reloc);
> + for (int i = 0; i < count; i++) {
> + handles[i] = gem_create(i915, 4096);
> + gem_write(i915, handles[i], 0, tmpl, 4096);
> }
> + free(tmpl);
>
> - handles = malloc (count * sizeof(uint32_t));
> - igt_assert(handles);
> + memset(&obj, 0, sizeof(obj));
> + obj.relocs_ptr = to_user_pointer(reloc);
> + obj.relocation_count = ARRAY_SIZE(reloc);
>
> - noop = gem_create(fd, 4096);
> - gem_write(fd, noop, 0, end, sizeof(end));
> + memset(&execbuf, 0, sizeof(execbuf));
> + execbuf.buffers_ptr = to_user_pointer(&obj);
> + execbuf.buffer_count = 1;
>
> /* fill the entire gart with batches and run them */
> - for (i = 0; i < count; i++) {
> - uint32_t bad;
> -
> - handles[i] = gem_create(fd, 4096);
> - gem_write(fd, handles[i], 0, batch, batch_size);
> -
> - bad = gem_create(fd, 4096);
> - gem_write(fd, bad, 0, hang, sizeof(hang));
> - gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
> + for (int i = 0; i < count; i++) {
> + obj.handle = handles[i];
>
> - /* launch the newly created batch */
> - copy(fd, handles[i], noop, bad);
> - exec(fd, bad);
> - gem_close(fd, bad);
> + reloc[0].target_handle = obj.handle;
> + reloc[0].presumed_offset = -1;
> + reloc[1].target_handle = obj.handle;
> + reloc[1].presumed_offset = -1;
>
> - igt_progress("gem_cpu_reloc: ", i, 2*count);
> + gem_execbuf(i915, &execbuf);
> }
>
> /* And again in reverse to try and catch the relocation code out */
> - for (i = 0; i < count; i++) {
> - uint32_t bad;
> + for (int i = 0; i < count; i++) {
> + obj.handle = handles[count - i - 1];
>
> - bad = gem_create(fd, 4096);
> - gem_write(fd, bad, 0, hang, sizeof(hang));
> - gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
> + reloc[0].target_handle = obj.handle;
> + reloc[0].presumed_offset = -1;
> + reloc[1].target_handle = obj.handle;
> + reloc[1].presumed_offset = -1;
>
> - /* launch the newly created batch */
> - copy(fd, handles[count-i-1], noop, bad);
> - exec(fd, bad);
> - gem_close(fd, bad);
> -
> - igt_progress("gem_cpu_reloc: ", count+i, 3*count);
> + gem_execbuf(i915, &execbuf);
> }
>
> - /* Third time lucky? */
> - for (i = 0; i < count; i++) {
> - uint32_t bad;
> + /* Third time unlucky? */
> + for (int i = 0; i < count; i++) {
> + obj.handle = handles[i];
>
> - bad = gem_create(fd, 4096);
> - gem_write(fd, bad, 0, hang, sizeof(hang));
> - gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
> + reloc[0].target_handle = obj.handle;
> + reloc[0].presumed_offset = -1;
> + reloc[1].target_handle = obj.handle;
> + reloc[1].presumed_offset = -1;
>
> - /* launch the newly created batch */
> - gem_set_domain(fd, handles[i],
> - I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
> - copy(fd, handles[i], noop, bad);
> - exec(fd, bad);
> - gem_close(fd, bad);
> + gem_set_domain(i915, obj.handle,
> + I915_GEM_DOMAIN_CPU,
> + I915_GEM_DOMAIN_CPU);
>
> - igt_progress("gem_cpu_reloc: ", 2*count+i, 3*count);
> + gem_execbuf(i915, &execbuf);
> }
>
> - igt_info("Subtest suceeded, cleanup up - this might take a while.\n");
> - for (i = 0; i < count; i++) {
> - gem_close(fd, handles[i]);
> - }
> - gem_close(fd, noop);
> + for (int i = 0; i < count; i++)
> + gem_close(i915, handles[i]);
> free(handles);
> }
>
> igt_main
> {
> - uint64_t aper_size;
> - int fd, count;
> + int i915;
>
> igt_fixture {
> - fd = drm_open_driver(DRIVER_INTEL);
> - igt_require_gem(fd);
> - }
> + i915 = drm_open_driver(DRIVER_INTEL);
> + igt_require_gem(i915);
>
> - igt_subtest("basic") {
> - run_test (fd, 10);
> + igt_fork_hang_detector(i915);
> }
>
> + igt_subtest("basic")
> + run_test(i915, 1);
>
> igt_subtest("full") {
> - aper_size = gem_mappable_aperture_size();
> - count = aper_size / 4096 * 2;
> + uint64_t aper_size = gem_mappable_aperture_size();
> + unsigned long count = aper_size / 4096 + 1;
> +
> + intel_require_memory(count, 4096, CHECK_RAM);
> +
> + run_test(i915, count);
> + }
> +
> + igt_subtest("forked") {
> + uint64_t aper_size = gem_mappable_aperture_size();
> + unsigned long count = aper_size / 4096 + 1;
> + int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>
> - /* count + 2 (noop & bad) buffers. A gem object appears to
> - require about 2kb + buffer + kernel overhead */
> - intel_require_memory(2+count, 2048+4096, CHECK_RAM);
> + intel_require_memory(count, 4096, CHECK_RAM);
>
> - run_test (fd, count);
> + igt_fork(child, ncpus)
> + run_test(i915, count / ncpus + 1);
> + igt_waitchildren();
> }
>
> igt_fixture {
> - close(fd);
> + igt_stop_hang_detector();
> }
> }
> --
> 2.20.1
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
WARNING: multiple messages have this Message-ID (diff)
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: igt-dev@lists.freedesktop.org, matthew.auld@intel.com
Subject: Re: [PATCH i-g-t 2/3] i915/gem_cpu_reloc: Use a self-modifying chained batch
Date: Wed, 16 Jan 2019 16:22:59 +0200 [thread overview]
Message-ID: <87fttsy94s.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20190116093509.30195-2-chris@chris-wilson.co.uk>
Chris Wilson <chris@chris-wilson.co.uk> writes:
> Use another sensitive CPU reloc to emit a chained batch from inside the
> updated buffer to reduce the workload on slow machines to fit within the
> CI timeout.
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=108248
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> tests/i915/gem_cpu_reloc.c | 347 ++++++++++++++++++++-----------------
> 1 file changed, 189 insertions(+), 158 deletions(-)
>
> diff --git a/tests/i915/gem_cpu_reloc.c b/tests/i915/gem_cpu_reloc.c
> index 882c312d4..33c4e4e0f 100644
> --- a/tests/i915/gem_cpu_reloc.c
> +++ b/tests/i915/gem_cpu_reloc.c
> @@ -59,214 +59,245 @@
>
> #include "intel_bufmgr.h"
>
> -IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
> +#define MI_INSTR(opcode, flags) ((opcode) << 23 | (flags))
>
> -static uint32_t use_blt;
> +IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
>
> -static void copy(int fd, uint32_t batch, uint32_t src, uint32_t dst)
> +static uint32_t *
> +gen2_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
> {
> - struct drm_i915_gem_execbuffer2 execbuf;
> - struct drm_i915_gem_relocation_entry gem_reloc[2];
> - struct drm_i915_gem_exec_object2 gem_exec[3];
> -
> - gem_reloc[0].offset = 4 * sizeof(uint32_t);
> - gem_reloc[0].delta = 0;
> - gem_reloc[0].target_handle = dst;
> - gem_reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> - gem_reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> - gem_reloc[0].presumed_offset = -1;
> -
> - gem_reloc[1].offset = 7 * sizeof(uint32_t);
> - if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> - gem_reloc[1].offset += sizeof(uint32_t);
> - gem_reloc[1].delta = 0;
> - gem_reloc[1].target_handle = src;
> - gem_reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> - gem_reloc[1].write_domain = 0;
> - gem_reloc[1].presumed_offset = -1;
> -
> - memset(gem_exec, 0, sizeof(gem_exec));
> - gem_exec[0].handle = src;
> - gem_exec[1].handle = dst;
> - gem_exec[2].handle = batch;
> - gem_exec[2].relocation_count = 2;
> - gem_exec[2].relocs_ptr = to_user_pointer(gem_reloc);
> -
> - memset(&execbuf, 0, sizeof(execbuf));
> - execbuf.buffers_ptr = to_user_pointer(gem_exec);
> - execbuf.buffer_count = 3;
> - execbuf.batch_len = 4096;
> - execbuf.flags = use_blt;
> -
> - gem_execbuf(fd, &execbuf);
> + *cs++ = MI_STORE_DWORD_IMM - 1;
> + addr->offset += sizeof(*cs);
> + cs += 1; /* addr */
> + cs += 1; /* value: implicit 0xffffffff */
> + return cs;
> +}
> +static uint32_t *
> +gen4_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
> +{
> + *cs++ = MI_STORE_DWORD_IMM;
> + *cs++ = 0;
> + addr->offset += 2 * sizeof(*cs);
> + cs += 1; /* addr */
> + cs += 1; /* value: implicit 0xffffffff */
> + return cs;
> +}
> +static uint32_t *
> +gen8_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
> +{
> + *cs++ = (MI_STORE_DWORD_IMM | 1 << 21) + 1;
> + addr->offset += sizeof(*cs);
> + igt_assert((addr->delta & 7) == 0);
> + cs += 2; /* addr */
> + cs += 2; /* value: implicit 0xffffffffffffffff */
> + return cs;
> }
>
> -static void exec(int fd, uint32_t handle)
> +static uint32_t *
> +gen2_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> {
> - struct drm_i915_gem_execbuffer2 execbuf;
> - struct drm_i915_gem_exec_object2 gem_exec;
> + *cs++ = MI_BATCH_BUFFER_START | 2 << 6;
> + reloc->offset += sizeof(*cs);
> + reloc->delta += 1;
> + cs += 1; /* addr */
> + return cs;
> +}
> +static uint32_t *
> +gen4_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> +{
> + *cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8;
> + reloc->offset += sizeof(*cs);
> + cs += 1; /* addr */
> + return cs;
> +}
> +static uint32_t *
> +gen6_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> +{
> + *cs++ = MI_BATCH_BUFFER_START | 1 << 8;
> + reloc->offset += sizeof(*cs);
> + cs += 1; /* addr */
> + return cs;
> +}
> +static uint32_t *
> +hsw_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> +{
> + *cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8 | 1 << 13;
> + reloc->offset += sizeof(*cs);
> + cs += 1; /* addr */
> + return cs;
> +}
> +static uint32_t *
> +gen8_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> +{
> + if (((uintptr_t)cs & 7) == 0) {
> + *cs++ = MI_NOOP; /* align addr for MI_STORE_DWORD_IMM */
> + reloc->offset += sizeof(*cs);
> + }
Align it so that after the bb start emitted we are in right alignment?
Otherwise it looks it should have '!' in it.
-Mika
>
> - memset(&gem_exec, 0, sizeof(gem_exec));
> - gem_exec.handle = handle;
> + *cs++ = MI_BATCH_BUFFER_START + 1;
> + reloc->offset += sizeof(*cs);
> + cs += 2; /* addr */
>
> - memset(&execbuf, 0, sizeof(execbuf));
> - execbuf.buffers_ptr = to_user_pointer(&gem_exec);
> - execbuf.buffer_count = 1;
> - execbuf.batch_len = 4096;
> + return cs;
> +}
>
> - gem_execbuf(fd, &execbuf);
> +static void *
> +create_tmpl(int i915, struct drm_i915_gem_relocation_entry *reloc)
> +{
> + const uint32_t devid = intel_get_drm_devid(i915);
> + const int gen = intel_gen(devid);
> + uint32_t *(*emit_store_addr)(uint32_t *cs,
> + struct drm_i915_gem_relocation_entry *addr);
> + uint32_t *(*emit_bb_start)(uint32_t *cs,
> + struct drm_i915_gem_relocation_entry *reloc);
> + void *tmpl;
> +
> + /* could use BLT_FILL instead for gen2 */
> + igt_require(gem_can_store_dword(i915, 0));
> +
> + if (gen >= 8)
> + emit_store_addr = gen8_emit_store_addr;
> + else if (gen >= 4)
> + emit_store_addr = gen4_emit_store_addr;
> + else
> + emit_store_addr = gen2_emit_store_addr;
> +
> + if (gen >= 8)
> + emit_bb_start = gen8_emit_bb_start;
> + else if (IS_HASWELL(devid))
> + emit_bb_start = hsw_emit_bb_start;
> + else if (gen >= 6)
> + emit_bb_start = gen6_emit_bb_start;
> + else if (gen >= 4)
> + emit_bb_start = gen4_emit_bb_start;
> + else
> + emit_bb_start = gen2_emit_bb_start;
> +
> + tmpl = malloc(4096);
> + igt_assert(tmpl);
> + memset(tmpl, 0xff, 4096);
> +
> + /* Jump over the booby traps to the end */
> + reloc[0].delta = 64;
> + emit_bb_start(tmpl, &reloc[0]);
> +
> + /* Restore the bad address to catch missing relocs */
> + reloc[1].offset = 64;
> + reloc[1].delta = reloc[0].offset;
> + *emit_store_addr(tmpl + 64, &reloc[1]) = MI_BATCH_BUFFER_END;
> +
> + return tmpl;
> }
>
> -uint32_t gen6_batch[] = {
> - (XY_SRC_COPY_BLT_CMD | 6 |
> - XY_SRC_COPY_BLT_WRITE_ALPHA |
> - XY_SRC_COPY_BLT_WRITE_RGB),
> - (3 << 24 | /* 32 bits */
> - 0xcc << 16 | /* copy ROP */
> - 4096),
> - 0 << 16 | 0, /* dst x1, y1 */
> - 1 << 16 | 2,
> - 0, /* dst relocation */
> - 0 << 16 | 0, /* src x1, y1 */
> - 4096,
> - 0, /* src relocation */
> - MI_BATCH_BUFFER_END,
> -};
> -
> -uint32_t gen8_batch[] = {
> - (XY_SRC_COPY_BLT_CMD | 8 |
> - XY_SRC_COPY_BLT_WRITE_ALPHA |
> - XY_SRC_COPY_BLT_WRITE_RGB),
> - (3 << 24 | /* 32 bits */
> - 0xcc << 16 | /* copy ROP */
> - 4096),
> - 0 << 16 | 0, /* dst x1, y1 */
> - 1 << 16 | 2,
> - 0, /* dst relocation */
> - 0, /* FIXME */
> - 0 << 16 | 0, /* src x1, y1 */
> - 4096,
> - 0, /* src relocation */
> - 0, /* FIXME */
> - MI_BATCH_BUFFER_END,
> -};
> -
> -uint32_t *batch = gen6_batch;
> -uint32_t batch_size = sizeof(gen6_batch);
> -
> -static void run_test(int fd, int count)
> +static void run_test(int i915, int count)
> {
> - const uint32_t hang[] = {-1, -1, -1, -1};
> - const uint32_t end[] = {MI_BATCH_BUFFER_END, 0};
> - uint32_t noop;
> - uint32_t *handles;
> - int i;
> + struct drm_i915_gem_execbuffer2 execbuf;
> + struct drm_i915_gem_relocation_entry reloc[2];
> + struct drm_i915_gem_exec_object2 obj;
>
> - noop = intel_get_drm_devid(fd);
> + uint32_t *handles;
> + uint32_t *tmpl;
>
> - use_blt = 0;
> - if (intel_gen(noop) >= 6)
> - use_blt = I915_EXEC_BLT;
> + handles = malloc(count * sizeof(uint32_t));
> + igt_assert(handles);
>
> - if (intel_gen(noop) >= 8) {
> - batch = gen8_batch;
> - batch_size += 2 * 4;
> + memset(reloc, 0, sizeof(reloc));
> + tmpl = create_tmpl(i915, reloc);
> + for (int i = 0; i < count; i++) {
> + handles[i] = gem_create(i915, 4096);
> + gem_write(i915, handles[i], 0, tmpl, 4096);
> }
> + free(tmpl);
>
> - handles = malloc (count * sizeof(uint32_t));
> - igt_assert(handles);
> + memset(&obj, 0, sizeof(obj));
> + obj.relocs_ptr = to_user_pointer(reloc);
> + obj.relocation_count = ARRAY_SIZE(reloc);
>
> - noop = gem_create(fd, 4096);
> - gem_write(fd, noop, 0, end, sizeof(end));
> + memset(&execbuf, 0, sizeof(execbuf));
> + execbuf.buffers_ptr = to_user_pointer(&obj);
> + execbuf.buffer_count = 1;
>
> /* fill the entire gart with batches and run them */
> - for (i = 0; i < count; i++) {
> - uint32_t bad;
> -
> - handles[i] = gem_create(fd, 4096);
> - gem_write(fd, handles[i], 0, batch, batch_size);
> -
> - bad = gem_create(fd, 4096);
> - gem_write(fd, bad, 0, hang, sizeof(hang));
> - gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
> + for (int i = 0; i < count; i++) {
> + obj.handle = handles[i];
>
> - /* launch the newly created batch */
> - copy(fd, handles[i], noop, bad);
> - exec(fd, bad);
> - gem_close(fd, bad);
> + reloc[0].target_handle = obj.handle;
> + reloc[0].presumed_offset = -1;
> + reloc[1].target_handle = obj.handle;
> + reloc[1].presumed_offset = -1;
>
> - igt_progress("gem_cpu_reloc: ", i, 2*count);
> + gem_execbuf(i915, &execbuf);
> }
>
> /* And again in reverse to try and catch the relocation code out */
> - for (i = 0; i < count; i++) {
> - uint32_t bad;
> + for (int i = 0; i < count; i++) {
> + obj.handle = handles[count - i - 1];
>
> - bad = gem_create(fd, 4096);
> - gem_write(fd, bad, 0, hang, sizeof(hang));
> - gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
> + reloc[0].target_handle = obj.handle;
> + reloc[0].presumed_offset = -1;
> + reloc[1].target_handle = obj.handle;
> + reloc[1].presumed_offset = -1;
>
> - /* launch the newly created batch */
> - copy(fd, handles[count-i-1], noop, bad);
> - exec(fd, bad);
> - gem_close(fd, bad);
> -
> - igt_progress("gem_cpu_reloc: ", count+i, 3*count);
> + gem_execbuf(i915, &execbuf);
> }
>
> - /* Third time lucky? */
> - for (i = 0; i < count; i++) {
> - uint32_t bad;
> + /* Third time unlucky? */
> + for (int i = 0; i < count; i++) {
> + obj.handle = handles[i];
>
> - bad = gem_create(fd, 4096);
> - gem_write(fd, bad, 0, hang, sizeof(hang));
> - gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
> + reloc[0].target_handle = obj.handle;
> + reloc[0].presumed_offset = -1;
> + reloc[1].target_handle = obj.handle;
> + reloc[1].presumed_offset = -1;
>
> - /* launch the newly created batch */
> - gem_set_domain(fd, handles[i],
> - I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
> - copy(fd, handles[i], noop, bad);
> - exec(fd, bad);
> - gem_close(fd, bad);
> + gem_set_domain(i915, obj.handle,
> + I915_GEM_DOMAIN_CPU,
> + I915_GEM_DOMAIN_CPU);
>
> - igt_progress("gem_cpu_reloc: ", 2*count+i, 3*count);
> + gem_execbuf(i915, &execbuf);
> }
>
> - igt_info("Subtest suceeded, cleanup up - this might take a while.\n");
> - for (i = 0; i < count; i++) {
> - gem_close(fd, handles[i]);
> - }
> - gem_close(fd, noop);
> + for (int i = 0; i < count; i++)
> + gem_close(i915, handles[i]);
> free(handles);
> }
>
> igt_main
> {
> - uint64_t aper_size;
> - int fd, count;
> + int i915;
>
> igt_fixture {
> - fd = drm_open_driver(DRIVER_INTEL);
> - igt_require_gem(fd);
> - }
> + i915 = drm_open_driver(DRIVER_INTEL);
> + igt_require_gem(i915);
>
> - igt_subtest("basic") {
> - run_test (fd, 10);
> + igt_fork_hang_detector(i915);
> }
>
> + igt_subtest("basic")
> + run_test(i915, 1);
>
> igt_subtest("full") {
> - aper_size = gem_mappable_aperture_size();
> - count = aper_size / 4096 * 2;
> + uint64_t aper_size = gem_mappable_aperture_size();
> + unsigned long count = aper_size / 4096 + 1;
> +
> + intel_require_memory(count, 4096, CHECK_RAM);
> +
> + run_test(i915, count);
> + }
> +
> + igt_subtest("forked") {
> + uint64_t aper_size = gem_mappable_aperture_size();
> + unsigned long count = aper_size / 4096 + 1;
> + int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>
> - /* count + 2 (noop & bad) buffers. A gem object appears to
> - require about 2kb + buffer + kernel overhead */
> - intel_require_memory(2+count, 2048+4096, CHECK_RAM);
> + intel_require_memory(count, 4096, CHECK_RAM);
>
> - run_test (fd, count);
> + igt_fork(child, ncpus)
> + run_test(i915, count / ncpus + 1);
> + igt_waitchildren();
> }
>
> igt_fixture {
> - close(fd);
> + igt_stop_hang_detector();
> }
> }
> --
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2019-01-16 14:22 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-01-16 9:35 [igt-dev] [PATCH i-g-t 1/3] i915/gem_userptr_blits: Only mlock the memfd once, not the arena Chris Wilson
2019-01-16 9:35 ` Chris Wilson
2019-01-16 9:35 ` [igt-dev] [PATCH i-g-t 2/3] i915/gem_cpu_reloc: Use a self-modifying chained batch Chris Wilson
2019-01-16 9:35 ` Chris Wilson
2019-01-16 14:22 ` Mika Kuoppala [this message]
2019-01-16 14:22 ` Mika Kuoppala
2019-01-16 14:30 ` [igt-dev] [Intel-gfx] " Chris Wilson
2019-01-16 14:30 ` Chris Wilson
2019-01-16 14:49 ` [igt-dev] [Intel-gfx] " Mika Kuoppala
2019-01-16 14:49 ` Mika Kuoppala
2019-01-16 9:35 ` [igt-dev] [PATCH i-g-t 3/3] igt/drv_missed_irq: Skip if the kernel reports no rings available to test Chris Wilson
2019-01-16 9:35 ` Chris Wilson
2019-01-16 14:30 ` [igt-dev] [Intel-gfx] " Mika Kuoppala
2019-01-16 14:30 ` Mika Kuoppala
2019-01-16 9:47 ` [igt-dev] [Intel-gfx] [PATCH i-g-t 1/3] i915/gem_userptr_blits: Only mlock the memfd once, not the arena Mika Kuoppala
2019-01-16 9:47 ` Mika Kuoppala
2019-01-16 9:58 ` [Intel-gfx] " Chris Wilson
2019-01-16 9:58 ` Chris Wilson
2019-01-16 10:35 ` [igt-dev] [Intel-gfx] " Mika Kuoppala
2019-01-16 10:35 ` Mika Kuoppala
2019-01-16 10:46 ` [igt-dev] [Intel-gfx] " Chris Wilson
2019-01-16 10:46 ` Chris Wilson
2019-01-16 10:27 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/3] " Patchwork
2019-01-16 11:58 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87fttsy94s.fsf@gaia.fi.intel.com \
--to=mika.kuoppala@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=igt-dev@lists.freedesktop.org \
--cc=intel-gfx@lists.freedesktop.org \
--cc=matthew.auld@intel.com \
--cc=tvrtko.ursulin@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.