* [PATCH igt 1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset
@ 2017-08-07 12:36 Chris Wilson
2017-08-07 12:36 ` [PATCH igt 2/2] lib: Remove illegal instructions from hang injection Chris Wilson
2017-08-07 13:13 ` ✓ Fi.CI.BAT: success for series starting with [Intel-gfx,1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset Patchwork
0 siblings, 2 replies; 5+ messages in thread
From: Chris Wilson @ 2017-08-07 12:36 UTC (permalink / raw)
To: intel-gfx; +Cc: daniel.vetter
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
tests/gem_exec_capture.c | 65 ++++++++++++++++++++++++++++++++++++------------
1 file changed, 49 insertions(+), 16 deletions(-)
diff --git a/tests/gem_exec_capture.c b/tests/gem_exec_capture.c
index f8f43d29..a73ece5d 100644
--- a/tests/gem_exec_capture.c
+++ b/tests/gem_exec_capture.c
@@ -64,9 +64,9 @@ static void capture(int fd, int dir, unsigned ring)
#define CAPTURE 1
#define NOCAPTURE 2
#define BATCH 3
- struct drm_i915_gem_relocation_entry reloc;
+ struct drm_i915_gem_relocation_entry reloc[2];
struct drm_i915_gem_execbuffer2 execbuf;
- uint32_t *batch;
+ uint32_t *batch, *seqno;
int i;
memset(obj, 0, sizeof(obj));
@@ -76,25 +76,50 @@ static void capture(int fd, int dir, unsigned ring)
obj[NOCAPTURE].handle = gem_create(fd, 4096);
obj[BATCH].handle = gem_create(fd, 4096);
- obj[BATCH].relocs_ptr = (uintptr_t)&reloc;
- obj[BATCH].relocation_count = 1;
-
- memset(&reloc, 0, sizeof(reloc));
- reloc.target_handle = obj[BATCH].handle; /* recurse */
- reloc.presumed_offset = 0;
- reloc.offset = sizeof(uint32_t);
- reloc.delta = 0;
- reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
- reloc.write_domain = 0;
+ obj[BATCH].relocs_ptr = (uintptr_t)reloc;
+ obj[BATCH].relocation_count = ARRAY_SIZE(reloc);
+
+ memset(reloc, 0, sizeof(reloc));
+ reloc[0].target_handle = obj[BATCH].handle; /* recurse */
+ reloc[0].presumed_offset = 0;
+ reloc[0].offset = 5*sizeof(uint32_t);
+ reloc[0].delta = 0;
+ reloc[0].read_domains = I915_GEM_DOMAIN_COMMAND;
+ reloc[0].write_domain = 0;
+
+ reloc[1].target_handle = obj[SCRATCH].handle; /* breadcrumb */
+ reloc[1].presumed_offset = 0;
+ reloc[1].offset = sizeof(uint32_t);
+ reloc[1].delta = 0;
+ reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+ reloc[1].write_domain = I915_GEM_DOMAIN_RENDER;
+
+ seqno = gem_mmap__wc(fd, obj[SCRATCH].handle, 0, 4096, PROT_READ);
+ gem_set_domain(fd, obj[SCRATCH].handle,
+ I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
batch = gem_mmap__cpu(fd, obj[BATCH].handle, 0, 4096, PROT_WRITE);
gem_set_domain(fd, obj[BATCH].handle,
I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
i = 0;
- batch[i++] = 0xdeadbeef; /* crashme */
- batch[i++] = -1;
- batch[i] = MI_BATCH_BUFFER_START; /* not crashed? try again! */
+ batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+ if (gen >= 8) {
+ batch[++i] = 0;
+ batch[++i] = 0;
+ } else if (gen >= 4) {
+ batch[++i] = 0;
+ batch[++i] = 0;
+ reloc[1].offset += sizeof(uint32_t);
+ } else {
+ batch[i]--;
+ batch[++i] = 0;
+ }
+ batch[++i] = 0xc0ffee;
+ if (gen < 3)
+ batch[++i] = MI_NOOP;
+
+ batch[++i] = MI_BATCH_BUFFER_START; /* not crashed? try again! */
if (gen >= 8) {
batch[i] |= 1 << 8 | 1;
batch[++i] = 0;
@@ -107,7 +132,7 @@ static void capture(int fd, int dir, unsigned ring)
batch[++i] = 0;
if (gen < 4) {
batch[i] |= 1;
- reloc.delta = 1;
+ reloc[0].delta = 1;
}
}
munmap(batch, 4096);
@@ -118,10 +143,17 @@ static void capture(int fd, int dir, unsigned ring)
execbuf.flags = ring;
gem_execbuf(fd, &execbuf);
+ /* Wait for the request to start */
+ while (*(volatile uint32_t *)seqno != 0xc0ffee)
+ igt_assert(gem_bo_busy(fd, obj[SCRATCH].handle));
+ munmap(seqno, 4096);
+
/* Check that only the buffer we marked is reported in the error */
igt_force_gpu_reset(fd);
check_error_state(dir, &obj[CAPTURE]);
+ gem_sync(fd, obj[BATCH].handle);
+
gem_close(fd, obj[BATCH].handle);
gem_close(fd, obj[NOCAPTURE].handle);
gem_close(fd, obj[CAPTURE].handle);
@@ -167,6 +199,7 @@ igt_main
igt_subtest_f("capture-%s", e->name) {
gem_require_ring(fd, e->exec_id | e->flags);
+ igt_require(gem_can_store_dword(fd, e->exec_id | e->flags));
capture(fd, dir, e->exec_id | e->flags);
}
}
--
2.13.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH igt 2/2] lib: Remove illegal instructions from hang injection
2017-08-07 12:36 [PATCH igt 1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset Chris Wilson
@ 2017-08-07 12:36 ` Chris Wilson
2017-08-07 13:33 ` Mika Kuoppala
2017-08-07 13:13 ` ✓ Fi.CI.BAT: success for series starting with [Intel-gfx,1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset Patchwork
1 sibling, 1 reply; 5+ messages in thread
From: Chris Wilson @ 2017-08-07 12:36 UTC (permalink / raw)
To: intel-gfx; +Cc: daniel.vetter
The idea behind using an illegal instruction was to hang the GPU must
faster than simply using the recursive batch. However, we stopped doing
so on gen8+ as the CS parser was much laxer and allowed the illegal
command through but still interpreted the packet length (jumping over
the recursive batch buffer start that followed). Sandybridge doesn't
just hang the GPU when it encounters an illegal command on the BLT
engine, it hangs the machine. That goes above and beyond testing our
hangcheck + reset, so remove the deadly instructions.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
lib/igt_gt.c | 25 +++++--------------------
1 file changed, 5 insertions(+), 20 deletions(-)
diff --git a/lib/igt_gt.c b/lib/igt_gt.c
index 6f7daa5e..d5e8b557 100644
--- a/lib/igt_gt.c
+++ b/lib/igt_gt.c
@@ -270,30 +270,15 @@ igt_hang_t igt_hang_ctx(int fd,
memset(b, 0xc5, sizeof(b));
- /*
- * We emit invalid command to provoke a gpu hang.
- * If that doesn't work, we do bb start loop.
- * Note that the bb start aligment is illegal due this.
- * But hey, we are here to hang the gpu so whatever works.
- * We skip 0xfffffff on gen9 as it confuses hw in an such a way that
- * it will skip over the bb start, causing runaway head and
- * thus much slower hang detection.
- */
len = 2;
- if (intel_gen(intel_get_drm_devid(fd)) >= 8) {
- b[0] = MI_NOOP;
+ if (intel_gen(intel_get_drm_devid(fd)) >= 8)
len++;
- } else {
- b[0] = 0xffffffff;
- }
-
- b[1] = MI_BATCH_BUFFER_START | (len - 2);
- b[1+len] = MI_BATCH_BUFFER_END;
- b[2+len] = MI_NOOP;
+ b[0] = MI_BATCH_BUFFER_START | (len - 2);
+ b[len] = MI_BATCH_BUFFER_END;
+ b[len+1] = MI_NOOP;
gem_write(fd, exec.handle, 0, b, sizeof(b));
- reloc.offset = 8;
- reloc.delta = 4;
+ reloc.offset = sizeof(uint32_t);
reloc.target_handle = exec.handle;
reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
--
2.13.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH igt 2/2] lib: Remove illegal instructions from hang injection
2017-08-07 12:36 ` [PATCH igt 2/2] lib: Remove illegal instructions from hang injection Chris Wilson
@ 2017-08-07 13:33 ` Mika Kuoppala
2017-08-08 13:25 ` Daniel Vetter
0 siblings, 1 reply; 5+ messages in thread
From: Mika Kuoppala @ 2017-08-07 13:33 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: daniel.vetter
Chris Wilson <chris@chris-wilson.co.uk> writes:
> The idea behind using an illegal instruction was to hang the GPU must
> faster than simply using the recursive batch. However, we stopped doing
> so on gen8+ as the CS parser was much laxer and allowed the illegal
> command through but still interpreted the packet length (jumping over
> the recursive batch buffer start that followed). Sandybridge doesn't
> just hang the GPU when it encounters an illegal command on the BLT
> engine, it hangs the machine. That goes above and beyond testing our
> hangcheck + reset, so remove the deadly instructions.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> lib/igt_gt.c | 25 +++++--------------------
> 1 file changed, 5 insertions(+), 20 deletions(-)
>
> diff --git a/lib/igt_gt.c b/lib/igt_gt.c
> index 6f7daa5e..d5e8b557 100644
> --- a/lib/igt_gt.c
> +++ b/lib/igt_gt.c
> @@ -270,30 +270,15 @@ igt_hang_t igt_hang_ctx(int fd,
>
> memset(b, 0xc5, sizeof(b));
>
> - /*
> - * We emit invalid command to provoke a gpu hang.
> - * If that doesn't work, we do bb start loop.
> - * Note that the bb start aligment is illegal due this.
> - * But hey, we are here to hang the gpu so whatever works.
> - * We skip 0xfffffff on gen9 as it confuses hw in an such a way that
> - * it will skip over the bb start, causing runaway head and
> - * thus much slower hang detection.
> - */
Daydreaming about MI_HALT,
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
> len = 2;
> - if (intel_gen(intel_get_drm_devid(fd)) >= 8) {
> - b[0] = MI_NOOP;
> + if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> len++;
> - } else {
> - b[0] = 0xffffffff;
> - }
> -
> - b[1] = MI_BATCH_BUFFER_START | (len - 2);
> - b[1+len] = MI_BATCH_BUFFER_END;
> - b[2+len] = MI_NOOP;
> + b[0] = MI_BATCH_BUFFER_START | (len - 2);
> + b[len] = MI_BATCH_BUFFER_END;
> + b[len+1] = MI_NOOP;
> gem_write(fd, exec.handle, 0, b, sizeof(b));
>
> - reloc.offset = 8;
> - reloc.delta = 4;
> + reloc.offset = sizeof(uint32_t);
> reloc.target_handle = exec.handle;
> reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
>
> --
> 2.13.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [PATCH igt 2/2] lib: Remove illegal instructions from hang injection
2017-08-07 13:33 ` Mika Kuoppala
@ 2017-08-08 13:25 ` Daniel Vetter
0 siblings, 0 replies; 5+ messages in thread
From: Daniel Vetter @ 2017-08-08 13:25 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: daniel.vetter, intel-gfx
On Mon, Aug 07, 2017 at 04:33:40PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
>
> > The idea behind using an illegal instruction was to hang the GPU must
> > faster than simply using the recursive batch. However, we stopped doing
> > so on gen8+ as the CS parser was much laxer and allowed the illegal
> > command through but still interpreted the packet length (jumping over
> > the recursive batch buffer start that followed). Sandybridge doesn't
> > just hang the GPU when it encounters an illegal command on the BLT
> > engine, it hangs the machine. That goes above and beyond testing our
> > hangcheck + reset, so remove the deadly instructions.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> > lib/igt_gt.c | 25 +++++--------------------
> > 1 file changed, 5 insertions(+), 20 deletions(-)
> >
> > diff --git a/lib/igt_gt.c b/lib/igt_gt.c
> > index 6f7daa5e..d5e8b557 100644
> > --- a/lib/igt_gt.c
> > +++ b/lib/igt_gt.c
> > @@ -270,30 +270,15 @@ igt_hang_t igt_hang_ctx(int fd,
> >
> > memset(b, 0xc5, sizeof(b));
> >
> > - /*
> > - * We emit invalid command to provoke a gpu hang.
> > - * If that doesn't work, we do bb start loop.
> > - * Note that the bb start aligment is illegal due this.
> > - * But hey, we are here to hang the gpu so whatever works.
> > - * We skip 0xfffffff on gen9 as it confuses hw in an such a way that
> > - * it will skip over the bb start, causing runaway head and
> > - * thus much slower hang detection.
> > - */
>
> Daydreaming about MI_HALT,
>
> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
I tested both on my snb for a few hours, works solid. I guess I botched
the job when I tried this conversion, resulting in a gpu that couldn't
reset somehow.
Both patches pushed to igt, thanks a lot.
-Daniel
>
> > len = 2;
> > - if (intel_gen(intel_get_drm_devid(fd)) >= 8) {
> > - b[0] = MI_NOOP;
> > + if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> > len++;
> > - } else {
> > - b[0] = 0xffffffff;
> > - }
> > -
> > - b[1] = MI_BATCH_BUFFER_START | (len - 2);
> > - b[1+len] = MI_BATCH_BUFFER_END;
> > - b[2+len] = MI_NOOP;
> > + b[0] = MI_BATCH_BUFFER_START | (len - 2);
> > + b[len] = MI_BATCH_BUFFER_END;
> > + b[len+1] = MI_NOOP;
> > gem_write(fd, exec.handle, 0, b, sizeof(b));
> >
> > - reloc.offset = 8;
> > - reloc.delta = 4;
> > + reloc.offset = sizeof(uint32_t);
> > reloc.target_handle = exec.handle;
> > reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
> >
> > --
> > 2.13.3
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* ✓ Fi.CI.BAT: success for series starting with [Intel-gfx,1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset
2017-08-07 12:36 [PATCH igt 1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset Chris Wilson
2017-08-07 12:36 ` [PATCH igt 2/2] lib: Remove illegal instructions from hang injection Chris Wilson
@ 2017-08-07 13:13 ` Patchwork
1 sibling, 0 replies; 5+ messages in thread
From: Patchwork @ 2017-08-07 13:13 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [Intel-gfx,1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset
URL : https://patchwork.freedesktop.org/series/28452/
State : success
== Summary ==
IGT patchset tested on top of latest successful build
79d6f77fa1ff33f198d954a3c7f1028322fcce52 tests/perf: follow up build fix
with latest DRM-Tip kernel build CI_DRM_2929
96c5eac5f202 drm-tip: 2017y-08m-07d-10h-55m-52s UTC integration manifest
Test gem_exec_parallel:
Subgroup basic:
fail -> PASS (fi-ilk-650) fdo#101735
Test gem_ringfill:
Subgroup basic-default:
skip -> PASS (fi-bsw-n3050) fdo#101915
fdo#101735 https://bugs.freedesktop.org/show_bug.cgi?id=101735
fdo#101915 https://bugs.freedesktop.org/show_bug.cgi?id=101915
fi-bdw-5557u total:279 pass:268 dwarn:0 dfail:0 fail:0 skip:11 time:438s
fi-bdw-gvtdvm total:279 pass:265 dwarn:0 dfail:0 fail:0 skip:14 time:423s
fi-blb-e6850 total:279 pass:224 dwarn:1 dfail:0 fail:0 skip:54 time:361s
fi-bsw-n3050 total:279 pass:243 dwarn:0 dfail:0 fail:0 skip:36 time:487s
fi-bxt-j4205 total:279 pass:260 dwarn:0 dfail:0 fail:0 skip:19 time:491s
fi-byt-j1900 total:279 pass:254 dwarn:1 dfail:0 fail:0 skip:24 time:526s
fi-byt-n2820 total:279 pass:250 dwarn:1 dfail:0 fail:0 skip:28 time:511s
fi-glk-2a total:279 pass:260 dwarn:0 dfail:0 fail:0 skip:19 time:583s
fi-hsw-4770 total:279 pass:263 dwarn:0 dfail:0 fail:0 skip:16 time:424s
fi-hsw-4770r total:279 pass:263 dwarn:0 dfail:0 fail:0 skip:16 time:404s
fi-ilk-650 total:279 pass:229 dwarn:0 dfail:0 fail:0 skip:50 time:421s
fi-ivb-3520m total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:506s
fi-ivb-3770 total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:482s
fi-kbl-7500u total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:463s
fi-kbl-7560u total:279 pass:269 dwarn:0 dfail:0 fail:0 skip:10 time:567s
fi-kbl-r total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:579s
fi-pnv-d510 total:279 pass:223 dwarn:1 dfail:0 fail:0 skip:55 time:571s
fi-skl-6260u total:279 pass:269 dwarn:0 dfail:0 fail:0 skip:10 time:446s
fi-skl-6700k total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:642s
fi-skl-6770hq total:279 pass:269 dwarn:0 dfail:0 fail:0 skip:10 time:470s
fi-skl-gvtdvm total:279 pass:266 dwarn:0 dfail:0 fail:0 skip:13 time:424s
fi-skl-x1585l total:279 pass:268 dwarn:0 dfail:0 fail:0 skip:11 time:472s
fi-snb-2520m total:279 pass:251 dwarn:0 dfail:0 fail:0 skip:28 time:547s
fi-snb-2600 total:279 pass:250 dwarn:0 dfail:0 fail:0 skip:29 time:417s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_27/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2017-08-08 13:25 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-08-07 12:36 [PATCH igt 1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset Chris Wilson
2017-08-07 12:36 ` [PATCH igt 2/2] lib: Remove illegal instructions from hang injection Chris Wilson
2017-08-07 13:33 ` Mika Kuoppala
2017-08-08 13:25 ` Daniel Vetter
2017-08-07 13:13 ` ✓ Fi.CI.BAT: success for series starting with [Intel-gfx,1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset Patchwork
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox