public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH igt 1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset
@ 2017-08-07 12:36 Chris Wilson
  2017-08-07 12:36 ` [PATCH igt 2/2] lib: Remove illegal instructions from hang injection Chris Wilson
  2017-08-07 13:13 ` ✓ Fi.CI.BAT: success for series starting with [Intel-gfx,1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset Patchwork
  0 siblings, 2 replies; 5+ messages in thread
From: Chris Wilson @ 2017-08-07 12:36 UTC (permalink / raw)
  To: intel-gfx; +Cc: daniel.vetter

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/gem_exec_capture.c | 65 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 49 insertions(+), 16 deletions(-)

diff --git a/tests/gem_exec_capture.c b/tests/gem_exec_capture.c
index f8f43d29..a73ece5d 100644
--- a/tests/gem_exec_capture.c
+++ b/tests/gem_exec_capture.c
@@ -64,9 +64,9 @@ static void capture(int fd, int dir, unsigned ring)
 #define CAPTURE 1
 #define NOCAPTURE 2
 #define BATCH 3
-	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_relocation_entry reloc[2];
 	struct drm_i915_gem_execbuffer2 execbuf;
-	uint32_t *batch;
+	uint32_t *batch, *seqno;
 	int i;
 
 	memset(obj, 0, sizeof(obj));
@@ -76,25 +76,50 @@ static void capture(int fd, int dir, unsigned ring)
 	obj[NOCAPTURE].handle = gem_create(fd, 4096);
 
 	obj[BATCH].handle = gem_create(fd, 4096);
-	obj[BATCH].relocs_ptr = (uintptr_t)&reloc;
-	obj[BATCH].relocation_count = 1;
-
-	memset(&reloc, 0, sizeof(reloc));
-	reloc.target_handle = obj[BATCH].handle; /* recurse */
-	reloc.presumed_offset = 0;
-	reloc.offset = sizeof(uint32_t);
-	reloc.delta = 0;
-	reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
-	reloc.write_domain = 0;
+	obj[BATCH].relocs_ptr = (uintptr_t)reloc;
+	obj[BATCH].relocation_count = ARRAY_SIZE(reloc);
+
+	memset(reloc, 0, sizeof(reloc));
+	reloc[0].target_handle = obj[BATCH].handle; /* recurse */
+	reloc[0].presumed_offset = 0;
+	reloc[0].offset = 5*sizeof(uint32_t);
+	reloc[0].delta = 0;
+	reloc[0].read_domains = I915_GEM_DOMAIN_COMMAND;
+	reloc[0].write_domain = 0;
+
+	reloc[1].target_handle = obj[SCRATCH].handle; /* breadcrumb */
+	reloc[1].presumed_offset = 0;
+	reloc[1].offset = sizeof(uint32_t);
+	reloc[1].delta = 0;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[1].write_domain = I915_GEM_DOMAIN_RENDER;
+
+	seqno = gem_mmap__wc(fd, obj[SCRATCH].handle, 0, 4096, PROT_READ);
+	gem_set_domain(fd, obj[SCRATCH].handle,
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
 
 	batch = gem_mmap__cpu(fd, obj[BATCH].handle, 0, 4096, PROT_WRITE);
 	gem_set_domain(fd, obj[BATCH].handle,
 			I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
 
 	i = 0;
-	batch[i++] = 0xdeadbeef; /* crashme */
-	batch[i++] = -1;
-	batch[i] = MI_BATCH_BUFFER_START; /* not crashed? try again! */
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = 0;
+		batch[++i] = 0;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = 0;
+		reloc[1].offset += sizeof(uint32_t);
+	} else {
+		batch[i]--;
+		batch[++i] = 0;
+	}
+	batch[++i] = 0xc0ffee;
+	if (gen < 3)
+		batch[++i] = MI_NOOP;
+
+	batch[++i] = MI_BATCH_BUFFER_START; /* not crashed? try again! */
 	if (gen >= 8) {
 		batch[i] |= 1 << 8 | 1;
 		batch[++i] = 0;
@@ -107,7 +132,7 @@ static void capture(int fd, int dir, unsigned ring)
 		batch[++i] = 0;
 		if (gen < 4) {
 			batch[i] |= 1;
-			reloc.delta = 1;
+			reloc[0].delta = 1;
 		}
 	}
 	munmap(batch, 4096);
@@ -118,10 +143,17 @@ static void capture(int fd, int dir, unsigned ring)
 	execbuf.flags = ring;
 	gem_execbuf(fd, &execbuf);
 
+	/* Wait for the request to start */
+	while (*(volatile uint32_t *)seqno != 0xc0ffee)
+		igt_assert(gem_bo_busy(fd, obj[SCRATCH].handle));
+	munmap(seqno, 4096);
+
 	/* Check that only the buffer we marked is reported in the error */
 	igt_force_gpu_reset(fd);
 	check_error_state(dir, &obj[CAPTURE]);
 
+	gem_sync(fd, obj[BATCH].handle);
+
 	gem_close(fd, obj[BATCH].handle);
 	gem_close(fd, obj[NOCAPTURE].handle);
 	gem_close(fd, obj[CAPTURE].handle);
@@ -167,6 +199,7 @@ igt_main
 
 		igt_subtest_f("capture-%s", e->name) {
 			gem_require_ring(fd, e->exec_id | e->flags);
+			igt_require(gem_can_store_dword(fd, e->exec_id | e->flags));
 			capture(fd, dir, e->exec_id | e->flags);
 		}
 	}
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH igt 2/2] lib: Remove illegal instructions from hang injection
  2017-08-07 12:36 [PATCH igt 1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset Chris Wilson
@ 2017-08-07 12:36 ` Chris Wilson
  2017-08-07 13:33   ` Mika Kuoppala
  2017-08-07 13:13 ` ✓ Fi.CI.BAT: success for series starting with [Intel-gfx,1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset Patchwork
  1 sibling, 1 reply; 5+ messages in thread
From: Chris Wilson @ 2017-08-07 12:36 UTC (permalink / raw)
  To: intel-gfx; +Cc: daniel.vetter

The idea behind using an illegal instruction was to hang the GPU must
faster than simply using the recursive batch. However, we stopped doing
so on gen8+ as the CS parser was much laxer and allowed the illegal
command through but still interpreted the packet length (jumping over
the recursive batch buffer start that followed). Sandybridge doesn't
just hang the GPU when it encounters an illegal command on the BLT
engine, it hangs the machine. That goes above and beyond testing our
hangcheck + reset, so remove the deadly instructions.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 lib/igt_gt.c | 25 +++++--------------------
 1 file changed, 5 insertions(+), 20 deletions(-)

diff --git a/lib/igt_gt.c b/lib/igt_gt.c
index 6f7daa5e..d5e8b557 100644
--- a/lib/igt_gt.c
+++ b/lib/igt_gt.c
@@ -270,30 +270,15 @@ igt_hang_t igt_hang_ctx(int fd,
 
 	memset(b, 0xc5, sizeof(b));
 
-	/*
-	 * We emit invalid command to provoke a gpu hang.
-	 * If that doesn't work, we do bb start loop.
-	 * Note that the bb start aligment is illegal due this.
-	 * But hey, we are here to hang the gpu so whatever works.
-	 * We skip 0xfffffff on gen9 as it confuses hw in an such a way that
-	 * it will skip over the bb start, causing runaway head and
-	 * thus much slower hang detection.
-	 */
 	len = 2;
-	if (intel_gen(intel_get_drm_devid(fd)) >= 8) {
-		b[0] = MI_NOOP;
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
 		len++;
-	} else {
-		b[0] = 0xffffffff;
-	}
-
-	b[1] = MI_BATCH_BUFFER_START | (len - 2);
-	b[1+len] = MI_BATCH_BUFFER_END;
-	b[2+len] = MI_NOOP;
+	b[0] = MI_BATCH_BUFFER_START | (len - 2);
+	b[len] = MI_BATCH_BUFFER_END;
+	b[len+1] = MI_NOOP;
 	gem_write(fd, exec.handle, 0, b, sizeof(b));
 
-	reloc.offset = 8;
-	reloc.delta = 4;
+	reloc.offset = sizeof(uint32_t);
 	reloc.target_handle = exec.handle;
 	reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
 
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [Intel-gfx,1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset
  2017-08-07 12:36 [PATCH igt 1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset Chris Wilson
  2017-08-07 12:36 ` [PATCH igt 2/2] lib: Remove illegal instructions from hang injection Chris Wilson
@ 2017-08-07 13:13 ` Patchwork
  1 sibling, 0 replies; 5+ messages in thread
From: Patchwork @ 2017-08-07 13:13 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [Intel-gfx,1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset
URL   : https://patchwork.freedesktop.org/series/28452/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
79d6f77fa1ff33f198d954a3c7f1028322fcce52 tests/perf: follow up build fix

with latest DRM-Tip kernel build CI_DRM_2929
96c5eac5f202 drm-tip: 2017y-08m-07d-10h-55m-52s UTC integration manifest

Test gem_exec_parallel:
        Subgroup basic:
                fail       -> PASS       (fi-ilk-650) fdo#101735
Test gem_ringfill:
        Subgroup basic-default:
                skip       -> PASS       (fi-bsw-n3050) fdo#101915

fdo#101735 https://bugs.freedesktop.org/show_bug.cgi?id=101735
fdo#101915 https://bugs.freedesktop.org/show_bug.cgi?id=101915

fi-bdw-5557u     total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:438s
fi-bdw-gvtdvm    total:279  pass:265  dwarn:0   dfail:0   fail:0   skip:14  time:423s
fi-blb-e6850     total:279  pass:224  dwarn:1   dfail:0   fail:0   skip:54  time:361s
fi-bsw-n3050     total:279  pass:243  dwarn:0   dfail:0   fail:0   skip:36  time:487s
fi-bxt-j4205     total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:491s
fi-byt-j1900     total:279  pass:254  dwarn:1   dfail:0   fail:0   skip:24  time:526s
fi-byt-n2820     total:279  pass:250  dwarn:1   dfail:0   fail:0   skip:28  time:511s
fi-glk-2a        total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:583s
fi-hsw-4770      total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:424s
fi-hsw-4770r     total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:404s
fi-ilk-650       total:279  pass:229  dwarn:0   dfail:0   fail:0   skip:50  time:421s
fi-ivb-3520m     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:506s
fi-ivb-3770      total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:482s
fi-kbl-7500u     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:463s
fi-kbl-7560u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:567s
fi-kbl-r         total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:579s
fi-pnv-d510      total:279  pass:223  dwarn:1   dfail:0   fail:0   skip:55  time:571s
fi-skl-6260u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:446s
fi-skl-6700k     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:642s
fi-skl-6770hq    total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:470s
fi-skl-gvtdvm    total:279  pass:266  dwarn:0   dfail:0   fail:0   skip:13  time:424s
fi-skl-x1585l    total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:472s
fi-snb-2520m     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:547s
fi-snb-2600      total:279  pass:250  dwarn:0   dfail:0   fail:0   skip:29  time:417s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_27/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH igt 2/2] lib: Remove illegal instructions from hang injection
  2017-08-07 12:36 ` [PATCH igt 2/2] lib: Remove illegal instructions from hang injection Chris Wilson
@ 2017-08-07 13:33   ` Mika Kuoppala
  2017-08-08 13:25     ` Daniel Vetter
  0 siblings, 1 reply; 5+ messages in thread
From: Mika Kuoppala @ 2017-08-07 13:33 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: daniel.vetter

Chris Wilson <chris@chris-wilson.co.uk> writes:

> The idea behind using an illegal instruction was to hang the GPU must
> faster than simply using the recursive batch. However, we stopped doing
> so on gen8+ as the CS parser was much laxer and allowed the illegal
> command through but still interpreted the packet length (jumping over
> the recursive batch buffer start that followed). Sandybridge doesn't
> just hang the GPU when it encounters an illegal command on the BLT
> engine, it hangs the machine. That goes above and beyond testing our
> hangcheck + reset, so remove the deadly instructions.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  lib/igt_gt.c | 25 +++++--------------------
>  1 file changed, 5 insertions(+), 20 deletions(-)
>
> diff --git a/lib/igt_gt.c b/lib/igt_gt.c
> index 6f7daa5e..d5e8b557 100644
> --- a/lib/igt_gt.c
> +++ b/lib/igt_gt.c
> @@ -270,30 +270,15 @@ igt_hang_t igt_hang_ctx(int fd,
>  
>  	memset(b, 0xc5, sizeof(b));
>  
> -	/*
> -	 * We emit invalid command to provoke a gpu hang.
> -	 * If that doesn't work, we do bb start loop.
> -	 * Note that the bb start aligment is illegal due this.
> -	 * But hey, we are here to hang the gpu so whatever works.
> -	 * We skip 0xfffffff on gen9 as it confuses hw in an such a way that
> -	 * it will skip over the bb start, causing runaway head and
> -	 * thus much slower hang detection.
> -	 */

Daydreaming about MI_HALT,

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

>  	len = 2;
> -	if (intel_gen(intel_get_drm_devid(fd)) >= 8) {
> -		b[0] = MI_NOOP;
> +	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
>  		len++;
> -	} else {
> -		b[0] = 0xffffffff;
> -	}
> -
> -	b[1] = MI_BATCH_BUFFER_START | (len - 2);
> -	b[1+len] = MI_BATCH_BUFFER_END;
> -	b[2+len] = MI_NOOP;
> +	b[0] = MI_BATCH_BUFFER_START | (len - 2);
> +	b[len] = MI_BATCH_BUFFER_END;
> +	b[len+1] = MI_NOOP;
>  	gem_write(fd, exec.handle, 0, b, sizeof(b));
>  
> -	reloc.offset = 8;
> -	reloc.delta = 4;
> +	reloc.offset = sizeof(uint32_t);
>  	reloc.target_handle = exec.handle;
>  	reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
>  
> -- 
> 2.13.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH igt 2/2] lib: Remove illegal instructions from hang injection
  2017-08-07 13:33   ` Mika Kuoppala
@ 2017-08-08 13:25     ` Daniel Vetter
  0 siblings, 0 replies; 5+ messages in thread
From: Daniel Vetter @ 2017-08-08 13:25 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: daniel.vetter, intel-gfx

On Mon, Aug 07, 2017 at 04:33:40PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > The idea behind using an illegal instruction was to hang the GPU must
> > faster than simply using the recursive batch. However, we stopped doing
> > so on gen8+ as the CS parser was much laxer and allowed the illegal
> > command through but still interpreted the packet length (jumping over
> > the recursive batch buffer start that followed). Sandybridge doesn't
> > just hang the GPU when it encounters an illegal command on the BLT
> > engine, it hangs the machine. That goes above and beyond testing our
> > hangcheck + reset, so remove the deadly instructions.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  lib/igt_gt.c | 25 +++++--------------------
> >  1 file changed, 5 insertions(+), 20 deletions(-)
> >
> > diff --git a/lib/igt_gt.c b/lib/igt_gt.c
> > index 6f7daa5e..d5e8b557 100644
> > --- a/lib/igt_gt.c
> > +++ b/lib/igt_gt.c
> > @@ -270,30 +270,15 @@ igt_hang_t igt_hang_ctx(int fd,
> >  
> >  	memset(b, 0xc5, sizeof(b));
> >  
> > -	/*
> > -	 * We emit invalid command to provoke a gpu hang.
> > -	 * If that doesn't work, we do bb start loop.
> > -	 * Note that the bb start aligment is illegal due this.
> > -	 * But hey, we are here to hang the gpu so whatever works.
> > -	 * We skip 0xfffffff on gen9 as it confuses hw in an such a way that
> > -	 * it will skip over the bb start, causing runaway head and
> > -	 * thus much slower hang detection.
> > -	 */
> 
> Daydreaming about MI_HALT,
> 
> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

I tested both on my snb for a few hours, works solid. I guess I botched
the job when I tried this conversion, resulting in a gpu that couldn't
reset somehow.

Both patches pushed to igt, thanks a lot.
-Daniel

> 
> >  	len = 2;
> > -	if (intel_gen(intel_get_drm_devid(fd)) >= 8) {
> > -		b[0] = MI_NOOP;
> > +	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> >  		len++;
> > -	} else {
> > -		b[0] = 0xffffffff;
> > -	}
> > -
> > -	b[1] = MI_BATCH_BUFFER_START | (len - 2);
> > -	b[1+len] = MI_BATCH_BUFFER_END;
> > -	b[2+len] = MI_NOOP;
> > +	b[0] = MI_BATCH_BUFFER_START | (len - 2);
> > +	b[len] = MI_BATCH_BUFFER_END;
> > +	b[len+1] = MI_NOOP;
> >  	gem_write(fd, exec.handle, 0, b, sizeof(b));
> >  
> > -	reloc.offset = 8;
> > -	reloc.delta = 4;
> > +	reloc.offset = sizeof(uint32_t);
> >  	reloc.target_handle = exec.handle;
> >  	reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
> >  
> > -- 
> > 2.13.3
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-08-08 13:25 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-08-07 12:36 [PATCH igt 1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset Chris Wilson
2017-08-07 12:36 ` [PATCH igt 2/2] lib: Remove illegal instructions from hang injection Chris Wilson
2017-08-07 13:33   ` Mika Kuoppala
2017-08-08 13:25     ` Daniel Vetter
2017-08-07 13:13 ` ✓ Fi.CI.BAT: success for series starting with [Intel-gfx,1/2] igt/gem_exec_capture: Wait for batch to execute before triggering reset Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox