* [PATCH 1/4] drm/i915/selftests: Include the trace as a debug aide
@ 2018-03-22 7:35 Chris Wilson
2018-03-22 7:35 ` [PATCH 2/4] drm/i915/selftests: Stress resets-vs-request-priority Chris Wilson
` (8 more replies)
0 siblings, 9 replies; 19+ messages in thread
From: Chris Wilson @ 2018-03-22 7:35 UTC (permalink / raw)
To: intel-gfx
If we fail to reset the GPU in a timely fashion, dump the GEM trace so
that we can see what operations were in flight when the GPU got stuck.
v2: There's more than one timeout that deserves tracing!
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 23 ++++++++++++++++++++---
1 file changed, 20 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 4372826998aa..1969a65072ca 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -260,8 +260,11 @@ static void wedge_me(struct work_struct *work)
{
struct wedge_me *w = container_of(work, typeof(*w), work.work);
- pr_err("%pS timed out, cancelling all further testing.\n",
- w->symbol);
+ pr_err("%pS timed out, cancelling all further testing.\n", w->symbol);
+
+ GEM_TRACE("%pS timed out.\n", w->symbol);
+ GEM_TRACE_DUMP();
+
i915_gem_set_wedged(w->i915);
}
@@ -621,9 +624,19 @@ static int active_engine(void *data)
mutex_unlock(&engine->i915->drm.struct_mutex);
if (old) {
- i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT);
+ if (i915_request_wait(old, 0, 10*HZ) < 0) {
+ GEM_TRACE("%s timed out.\n", engine->name);
+ GEM_TRACE_DUMP();
+
+ i915_gem_set_wedged(engine->i915);
+ i915_request_put(old);
+ err = -EIO;
+ break;
+ }
i915_request_put(old);
}
+
+ cond_resched();
}
for (count = 0; count < ARRAY_SIZE(rq); count++)
@@ -1126,6 +1139,10 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
err = i915_subtests(tests, i915);
+ mutex_lock(&i915->drm.struct_mutex);
+ flush_test(i915, I915_WAIT_LOCKED);
+ mutex_unlock(&i915->drm.struct_mutex);
+
i915_modparams.enable_hangcheck = saved_hangcheck;
intel_runtime_pm_put(i915);
--
2.16.2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 2/4] drm/i915/selftests: Stress resets-vs-request-priority 2018-03-22 7:35 [PATCH 1/4] drm/i915/selftests: Include the trace as a debug aide Chris Wilson @ 2018-03-22 7:35 ` Chris Wilson 2018-03-22 7:35 ` [PATCH 3/4] drm/i915: Use full serialisation around engine->irq_posted Chris Wilson ` (7 subsequent siblings) 8 siblings, 0 replies; 19+ messages in thread From: Chris Wilson @ 2018-03-22 7:35 UTC (permalink / raw) To: intel-gfx Watch what happens if we try to reset with a queue of requests with varying priorities -- that may need reordering or preemption across the reset. v2: Tweak priorities to avoid starving the hanging thread. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 189 +++++++++++++++-------- 1 file changed, 126 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 1969a65072ca..c5ed4006c319 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -25,6 +25,7 @@ #include <linux/kthread.h> #include "../i915_selftest.h" +#include "i915_random.h" #include "mock_context.h" #include "mock_drm.h" @@ -486,6 +487,8 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { + u32 seqno = intel_engine_get_seqno(engine); + if (active) { struct i915_request *rq; @@ -514,12 +517,13 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } + GEM_BUG_ON(!rq->global_seqno); + seqno = rq->global_seqno - 1; i915_request_put(rq); } engine->hangcheck.stalled = true; - engine->hangcheck.seqno = - intel_engine_get_seqno(engine); + engine->hangcheck.seqno = seqno; err = i915_reset_engine(engine, NULL); if (err) { @@ -576,11 +580,25 @@ static int igt_reset_active_engine(void *arg) return __igt_reset_engine(arg, true); } +struct active_engine { + struct task_struct *task; + struct intel_engine_cs *engine; + unsigned long resets; + unsigned int flags; +}; + +#define TEST_ACTIVE BIT(0) +#define TEST_OTHERS BIT(1) +#define TEST_SELF BIT(2) +#define TEST_PRIORITY BIT(3) + static int active_engine(void *data) { - struct intel_engine_cs *engine = data; - struct i915_request *rq[2] = {}; - struct i915_gem_context *ctx[2]; + I915_RND_STATE(prng); + struct active_engine *arg = data; + struct intel_engine_cs *engine = arg->engine; + struct i915_request *rq[8] = {}; + struct i915_gem_context *ctx[ARRAY_SIZE(rq)]; struct drm_file *file; unsigned long count = 0; int err = 0; @@ -589,25 +607,20 @@ static int active_engine(void *data) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&engine->i915->drm.struct_mutex); - ctx[0] = live_context(engine->i915, file); - mutex_unlock(&engine->i915->drm.struct_mutex); - if (IS_ERR(ctx[0])) { - err = PTR_ERR(ctx[0]); - goto err_file; - } - - mutex_lock(&engine->i915->drm.struct_mutex); - ctx[1] = live_context(engine->i915, file); - mutex_unlock(&engine->i915->drm.struct_mutex); - if (IS_ERR(ctx[1])) { - err = PTR_ERR(ctx[1]); - i915_gem_context_put(ctx[0]); - goto err_file; + for (count = 0; count < ARRAY_SIZE(ctx); count++) { + mutex_lock(&engine->i915->drm.struct_mutex); + ctx[count] = live_context(engine->i915, file); + mutex_unlock(&engine->i915->drm.struct_mutex); + if (IS_ERR(ctx[count])) { + err = PTR_ERR(ctx[count]); + while (--count) + i915_gem_context_put(ctx[count]); + goto err_file; + } } while (!kthread_should_stop()) { - unsigned int idx = count++ & 1; + unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1); struct i915_request *old = rq[idx]; struct i915_request *new; @@ -619,6 +632,10 @@ static int active_engine(void *data) break; } + if (arg->flags & TEST_PRIORITY) + ctx[idx]->priority = + i915_prandom_u32_max_state(512, &prng); + rq[idx] = i915_request_get(new); i915_request_add(new); mutex_unlock(&engine->i915->drm.struct_mutex); @@ -647,8 +664,9 @@ static int active_engine(void *data) return err; } -static int __igt_reset_engine_others(struct drm_i915_private *i915, - bool active) +static int __igt_reset_engines(struct drm_i915_private *i915, + const char *test_name, + unsigned int flags) { struct intel_engine_cs *engine, *other; enum intel_engine_id id, tmp; @@ -662,50 +680,61 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, if (!intel_has_reset_engine(i915)) return 0; - if (active) { + if (flags & TEST_ACTIVE) { mutex_lock(&i915->drm.struct_mutex); err = hang_init(&h, i915); mutex_unlock(&i915->drm.struct_mutex); if (err) return err; + + if (flags & TEST_PRIORITY) + h.ctx->priority = 1024; } for_each_engine(engine, i915, id) { - struct task_struct *threads[I915_NUM_ENGINES] = {}; - unsigned long resets[I915_NUM_ENGINES]; + struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long global = i915_reset_count(&i915->gpu_error); - unsigned long count = 0; + unsigned long count = 0, reported; IGT_TIMEOUT(end_time); - if (active && !intel_engine_can_store_dword(engine)) + if (flags & TEST_ACTIVE && + !intel_engine_can_store_dword(engine)) continue; memset(threads, 0, sizeof(threads)); for_each_engine(other, i915, tmp) { struct task_struct *tsk; - resets[tmp] = i915_reset_engine_count(&i915->gpu_error, - other); + threads[tmp].resets = + i915_reset_engine_count(&i915->gpu_error, + other); - if (other == engine) + if (!(flags & TEST_OTHERS)) continue; - tsk = kthread_run(active_engine, other, + if (other == engine && !(flags & TEST_SELF)) + continue; + + threads[tmp].engine = other; + threads[tmp].flags = flags; + + tsk = kthread_run(active_engine, &threads[tmp], "igt/%s", other->name); if (IS_ERR(tsk)) { err = PTR_ERR(tsk); goto unwind; } - threads[tmp] = tsk; + threads[tmp].task = tsk; get_task_struct(tsk); } set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { - if (active) { - struct i915_request *rq; + u32 seqno = intel_engine_get_seqno(engine); + struct i915_request *rq = NULL; + if (flags & TEST_ACTIVE) { mutex_lock(&i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { @@ -731,33 +760,38 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, break; } - i915_request_put(rq); + GEM_BUG_ON(!rq->global_seqno); + seqno = rq->global_seqno - 1; } engine->hangcheck.stalled = true; - engine->hangcheck.seqno = - intel_engine_get_seqno(engine); + engine->hangcheck.seqno = seqno; err = i915_reset_engine(engine, NULL); if (err) { - pr_err("i915_reset_engine(%s:%s) failed, err=%d\n", - engine->name, active ? "active" : "idle", err); + pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", + engine->name, test_name, err); break; } engine->hangcheck.stalled = false; count++; + + if (rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); pr_info("i915_reset_engine(%s:%s): %lu resets\n", - engine->name, active ? "active" : "idle", count); - - if (i915_reset_engine_count(&i915->gpu_error, engine) - - resets[engine->id] != (active ? count : 0)) { - pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", - engine->name, active ? "active" : "idle", count, - i915_reset_engine_count(&i915->gpu_error, - engine) - resets[engine->id]); + engine->name, test_name, count); + + reported = i915_reset_engine_count(&i915->gpu_error, engine); + reported -= threads[engine->id].resets; + if (reported != (flags & TEST_ACTIVE ? count : 0)) { + pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu, expected %lu reported\n", + engine->name, test_name, count, reported, + (flags & TEST_ACTIVE ? count : 0)); if (!err) err = -EINVAL; } @@ -766,24 +800,26 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, for_each_engine(other, i915, tmp) { int ret; - if (!threads[tmp]) + if (!threads[tmp].task) continue; - ret = kthread_stop(threads[tmp]); + ret = kthread_stop(threads[tmp].task); if (ret) { pr_err("kthread for other engine %s failed, err=%d\n", other->name, ret); if (!err) err = ret; } - put_task_struct(threads[tmp]); + put_task_struct(threads[tmp].task); - if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error, - other)) { + if (other != engine && + threads[tmp].resets != + i915_reset_engine_count(&i915->gpu_error, other)) { pr_err("Innocent engine %s was reset (count=%ld)\n", other->name, i915_reset_engine_count(&i915->gpu_error, - other) - resets[tmp]); + other) - + threads[tmp].resets); if (!err) err = -EINVAL; } @@ -807,7 +843,7 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, if (i915_terminally_wedged(&i915->gpu_error)) err = -EIO; - if (active) { + if (flags & TEST_ACTIVE) { mutex_lock(&i915->drm.struct_mutex); hang_fini(&h); mutex_unlock(&i915->drm.struct_mutex); @@ -816,14 +852,42 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, return err; } -static int igt_reset_idle_engine_others(void *arg) +static int igt_reset_engines(void *arg) { - return __igt_reset_engine_others(arg, false); -} + static const struct { + const char *name; + unsigned int flags; + } phases[] = { + { "idle", 0 }, + { "active", TEST_ACTIVE }, + { "others-idle", TEST_OTHERS }, + { "others-active", TEST_OTHERS | TEST_ACTIVE }, + { + "others-priority", + TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY + }, + { + "self-priority", + TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF, + }, + { } + }; + struct drm_i915_private *i915 = arg; + typeof(*phases) *p; + int err; -static int igt_reset_active_engine_others(void *arg) -{ - return __igt_reset_engine_others(arg, true); + for (p = phases; p->name; p++) { + if (p->flags & TEST_PRIORITY) { + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + continue; + } + + err = __igt_reset_engines(arg, p->name, p->flags); + if (err) + return err; + } + + return 0; } static u32 fake_hangcheck(struct i915_request *rq) @@ -1122,8 +1186,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_hang_sanitycheck), SUBTEST(igt_reset_idle_engine), SUBTEST(igt_reset_active_engine), - SUBTEST(igt_reset_idle_engine_others), - SUBTEST(igt_reset_active_engine_others), + SUBTEST(igt_reset_engines), SUBTEST(igt_wait_reset), SUBTEST(igt_reset_queue), SUBTEST(igt_handle_error), -- 2.16.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 3/4] drm/i915: Use full serialisation around engine->irq_posted 2018-03-22 7:35 [PATCH 1/4] drm/i915/selftests: Include the trace as a debug aide Chris Wilson 2018-03-22 7:35 ` [PATCH 2/4] drm/i915/selftests: Stress resets-vs-request-priority Chris Wilson @ 2018-03-22 7:35 ` Chris Wilson 2018-03-22 14:35 ` Mika Kuoppala ` (2 more replies) 2018-03-22 7:35 ` [PATCH 4/4] drm/i915: Flush pending interrupt following a GPU reset Chris Wilson ` (6 subsequent siblings) 8 siblings, 3 replies; 19+ messages in thread From: Chris Wilson @ 2018-03-22 7:35 UTC (permalink / raw) To: intel-gfx Using engine->irq_posted for execlists, we are not always serialised by the tasklet as we supposed. On the reset paths, the tasklet is disabled and ignored. Instead, we manipulate the engine->irq_posted directly to account for the reset, but if an interrupt fired before the reset and so wrote to engine->irq_posted, that write may not be flushed from the local CPU's cacheline until much later as the tasklet is already active and so does not generate a mb(). To correctly serialise the interrupt with reset, we need serialisation on the set_bit() itself. And at last Mika can be happy. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Michał Winiarski <michal.winiarski@intel.com> CC: Michel Thierry <michel.thierry@intel.com> Cc: Jeff McGee <jeff.mcgee@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/i915_irq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index fa7310766217..27aee25429b7 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1405,10 +1405,9 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) bool tasklet = false; if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { - if (READ_ONCE(engine->execlists.active)) { - __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - tasklet = true; - } + if (READ_ONCE(engine->execlists.active)) + tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, + &engine->irq_posted); } if (iir & GT_RENDER_USER_INTERRUPT) { -- 2.16.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH 3/4] drm/i915: Use full serialisation around engine->irq_posted 2018-03-22 7:35 ` [PATCH 3/4] drm/i915: Use full serialisation around engine->irq_posted Chris Wilson @ 2018-03-22 14:35 ` Mika Kuoppala 2018-03-22 15:34 ` Jeff McGee 2018-03-30 23:08 ` Chris Wilson 2 siblings, 0 replies; 19+ messages in thread From: Mika Kuoppala @ 2018-03-22 14:35 UTC (permalink / raw) To: Chris Wilson, intel-gfx Chris Wilson <chris@chris-wilson.co.uk> writes: > Using engine->irq_posted for execlists, we are not always serialised by > the tasklet as we supposed. On the reset paths, the tasklet is disabled > and ignored. Instead, we manipulate the engine->irq_posted directly to > account for the reset, but if an interrupt fired before the reset and so > wrote to engine->irq_posted, that write may not be flushed from the > local CPU's cacheline until much later as the tasklet is already active > and so does not generate a mb(). To correctly serialise the interrupt > with reset, we need serialisation on the set_bit() itself. > > And at last Mika can be happy. Yes. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > Cc: Michał Winiarski <michal.winiarski@intel.com> > CC: Michel Thierry <michel.thierry@intel.com> > Cc: Jeff McGee <jeff.mcgee@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > --- > drivers/gpu/drm/i915/i915_irq.c | 7 +++---- > 1 file changed, 3 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index fa7310766217..27aee25429b7 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1405,10 +1405,9 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) > bool tasklet = false; > > if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { > - if (READ_ONCE(engine->execlists.active)) { > - __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > - tasklet = true; > - } > + if (READ_ONCE(engine->execlists.active)) > + tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, > + &engine->irq_posted); > } > > if (iir & GT_RENDER_USER_INTERRUPT) { > -- > 2.16.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/4] drm/i915: Use full serialisation around engine->irq_posted 2018-03-22 7:35 ` [PATCH 3/4] drm/i915: Use full serialisation around engine->irq_posted Chris Wilson 2018-03-22 14:35 ` Mika Kuoppala @ 2018-03-22 15:34 ` Jeff McGee 2018-03-22 17:01 ` Chris Wilson 2018-03-30 23:08 ` Chris Wilson 2 siblings, 1 reply; 19+ messages in thread From: Jeff McGee @ 2018-03-22 15:34 UTC (permalink / raw) To: Chris Wilson; +Cc: intel-gfx On Thu, Mar 22, 2018 at 07:35:32AM +0000, Chris Wilson wrote: > Using engine->irq_posted for execlists, we are not always serialised by > the tasklet as we supposed. On the reset paths, the tasklet is disabled > and ignored. Instead, we manipulate the engine->irq_posted directly to > account for the reset, but if an interrupt fired before the reset and so > wrote to engine->irq_posted, that write may not be flushed from the > local CPU's cacheline until much later as the tasklet is already active > and so does not generate a mb(). To correctly serialise the interrupt > with reset, we need serialisation on the set_bit() itself. > > And at last Mika can be happy. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > Cc: Michał Winiarski <michal.winiarski@intel.com> > CC: Michel Thierry <michel.thierry@intel.com> > Cc: Jeff McGee <jeff.mcgee@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/i915_irq.c | 7 +++---- > 1 file changed, 3 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index fa7310766217..27aee25429b7 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1405,10 +1405,9 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) > bool tasklet = false; > > if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { > - if (READ_ONCE(engine->execlists.active)) { > - __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > - tasklet = true; > - } > + if (READ_ONCE(engine->execlists.active)) > + tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, > + &engine->irq_posted); > } > > if (iir & GT_RENDER_USER_INTERRUPT) { > -- > 2.16.2 > Confirmed that this along with the interrupt flush eliminates the cases of finding CSB tail at its reset value (0x7) in the tasklet in my force preemption tests. Reviewed-by: Jeff McGee <jeff.mcgee@intel.com> _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/4] drm/i915: Use full serialisation around engine->irq_posted 2018-03-22 15:34 ` Jeff McGee @ 2018-03-22 17:01 ` Chris Wilson 0 siblings, 0 replies; 19+ messages in thread From: Chris Wilson @ 2018-03-22 17:01 UTC (permalink / raw) To: Jeff McGee; +Cc: intel-gfx, Mika Quoting Jeff McGee (2018-03-22 15:34:45) > On Thu, Mar 22, 2018 at 07:35:32AM +0000, Chris Wilson wrote: > > Using engine->irq_posted for execlists, we are not always serialised by > > the tasklet as we supposed. On the reset paths, the tasklet is disabled > > and ignored. Instead, we manipulate the engine->irq_posted directly to > > account for the reset, but if an interrupt fired before the reset and so > > wrote to engine->irq_posted, that write may not be flushed from the > > local CPU's cacheline until much later as the tasklet is already active > > and so does not generate a mb(). To correctly serialise the interrupt > > with reset, we need serialisation on the set_bit() itself. > > > > And at last Mika can be happy. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > > Cc: Michał Winiarski <michal.winiarski@intel.com> > > CC: Michel Thierry <michel.thierry@intel.com> > > Cc: Jeff McGee <jeff.mcgee@intel.com> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > --- > > drivers/gpu/drm/i915/i915_irq.c | 7 +++---- > > 1 file changed, 3 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > > index fa7310766217..27aee25429b7 100644 > > --- a/drivers/gpu/drm/i915/i915_irq.c > > +++ b/drivers/gpu/drm/i915/i915_irq.c > > @@ -1405,10 +1405,9 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) > > bool tasklet = false; > > > > if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { > > - if (READ_ONCE(engine->execlists.active)) { > > - __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > > - tasklet = true; > > - } > > + if (READ_ONCE(engine->execlists.active)) > > + tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, > > + &engine->irq_posted); > > } > > > > if (iir & GT_RENDER_USER_INTERRUPT) { > > -- > > 2.16.2 > > > > Confirmed that this along with the interrupt flush eliminates the cases > of finding CSB tail at its reset value (0x7) in the tasklet in my force > preemption tests. At the moment, I'm concerned about the failures we have in CI before we go building on top. So care to complete the set of r-b for us to move on? -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/4] drm/i915: Use full serialisation around engine->irq_posted 2018-03-22 7:35 ` [PATCH 3/4] drm/i915: Use full serialisation around engine->irq_posted Chris Wilson 2018-03-22 14:35 ` Mika Kuoppala 2018-03-22 15:34 ` Jeff McGee @ 2018-03-30 23:08 ` Chris Wilson 2018-03-31 8:59 ` Chris Wilson 2 siblings, 1 reply; 19+ messages in thread From: Chris Wilson @ 2018-03-30 23:08 UTC (permalink / raw) To: intel-gfx; +Cc: Mika Quoting Chris Wilson (2018-03-22 07:35:32) > Using engine->irq_posted for execlists, we are not always serialised by > the tasklet as we supposed. On the reset paths, the tasklet is disabled > and ignored. Instead, we manipulate the engine->irq_posted directly to > account for the reset, but if an interrupt fired before the reset and so > wrote to engine->irq_posted, that write may not be flushed from the > local CPU's cacheline until much later as the tasklet is already active > and so does not generate a mb(). To correctly serialise the interrupt > with reset, we need serialisation on the set_bit() itself. > > And at last Mika can be happy. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > Cc: Michał Winiarski <michal.winiarski@intel.com> > CC: Michel Thierry <michel.thierry@intel.com> > Cc: Jeff McGee <jeff.mcgee@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/i915_irq.c | 7 +++---- > 1 file changed, 3 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index fa7310766217..27aee25429b7 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1405,10 +1405,9 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) > bool tasklet = false; > > if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { > - if (READ_ONCE(engine->execlists.active)) { > - __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > - tasklet = true; > - } > + if (READ_ONCE(engine->execlists.active)) > + tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, > + &engine->irq_posted); This is driving me mad. A very rare missed interrupt unless we unconditionally kick tasklet: if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { - if (READ_ONCE(engine->execlists.active)) - tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, - &engine->irq_posted); + if (READ_ONCE(engine->execlists.active)) { + set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + tasklet = true; + } } I can't see why. Hmm, I wonder if we are seeing READ_ONCE(execlsts->active) false negatives. Getting close to admitting defeat :( -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/4] drm/i915: Use full serialisation around engine->irq_posted 2018-03-30 23:08 ` Chris Wilson @ 2018-03-31 8:59 ` Chris Wilson 0 siblings, 0 replies; 19+ messages in thread From: Chris Wilson @ 2018-03-31 8:59 UTC (permalink / raw) To: intel-gfx Quoting Chris Wilson (2018-03-31 00:08:47) > Quoting Chris Wilson (2018-03-22 07:35:32) > > Using engine->irq_posted for execlists, we are not always serialised by > > the tasklet as we supposed. On the reset paths, the tasklet is disabled > > and ignored. Instead, we manipulate the engine->irq_posted directly to > > account for the reset, but if an interrupt fired before the reset and so > > wrote to engine->irq_posted, that write may not be flushed from the > > local CPU's cacheline until much later as the tasklet is already active > > and so does not generate a mb(). To correctly serialise the interrupt > > with reset, we need serialisation on the set_bit() itself. > > > > And at last Mika can be happy. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > > Cc: Michał Winiarski <michal.winiarski@intel.com> > > CC: Michel Thierry <michel.thierry@intel.com> > > Cc: Jeff McGee <jeff.mcgee@intel.com> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > --- > > drivers/gpu/drm/i915/i915_irq.c | 7 +++---- > > 1 file changed, 3 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > > index fa7310766217..27aee25429b7 100644 > > --- a/drivers/gpu/drm/i915/i915_irq.c > > +++ b/drivers/gpu/drm/i915/i915_irq.c > > @@ -1405,10 +1405,9 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) > > bool tasklet = false; > > > > if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { > > - if (READ_ONCE(engine->execlists.active)) { > > - __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > > - tasklet = true; > > - } > > + if (READ_ONCE(engine->execlists.active)) > > + tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, > > + &engine->irq_posted); > > This is driving me mad. A very rare missed interrupt unless we > unconditionally kick tasklet: > > if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { > - if (READ_ONCE(engine->execlists.active)) > - tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, > - &engine->irq_posted); > + if (READ_ONCE(engine->execlists.active)) { > + set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > + tasklet = true; > + } > } > > I can't see why. > > Hmm, I wonder if we are seeing READ_ONCE(execlsts->active) false > negatives. Fortunately, doesn't appear to be that. @@ -1405,9 +1405,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) bool tasklet = false; if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { - if (READ_ONCE(engine->execlists.active)) - tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, - &engine->irq_posted); + GEM_BUG_ON(!READ_ONCE(execlists->tasklet.state) && + test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)); + tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, + &engine->irq_posted); } Hasn't even hit a BUG, which is a little disconcerting. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 4/4] drm/i915: Flush pending interrupt following a GPU reset 2018-03-22 7:35 [PATCH 1/4] drm/i915/selftests: Include the trace as a debug aide Chris Wilson 2018-03-22 7:35 ` [PATCH 2/4] drm/i915/selftests: Stress resets-vs-request-priority Chris Wilson 2018-03-22 7:35 ` [PATCH 3/4] drm/i915: Use full serialisation around engine->irq_posted Chris Wilson @ 2018-03-22 7:35 ` Chris Wilson 2018-03-22 7:43 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/4] drm/i915/selftests: Include the trace as a debug aide Patchwork ` (5 subsequent siblings) 8 siblings, 0 replies; 19+ messages in thread From: Chris Wilson @ 2018-03-22 7:35 UTC (permalink / raw) To: intel-gfx After resetting the GPU (or subset of engines), call synchronize_irq() to flush any pending irq before proceeding with the cleanup. For a device level reset, we disable the interupts around the reset, but when resetting just one engine, we have to avoid such global disabling. This leaves us open to an interrupt arriving for the engine as we try to reset it. We already do try to flush the IIR following the reset, but we have to ensure that the in-flight interrupt does not land after we start cleaning up after the reset; enter synchronize_irq(). As it current stands, we very rarely, but fatally, see sequences such as: 2.... 57964564us : execlists_reset_prepare: rcs0 2.... 57964613us : execlists_reset: rcs0 seqno=424 0d.h1 57964615us : gen8_cs_irq_handler: rcs0 CS active=1 2d..1 57964617us : __i915_request_unsubmit: rcs0 fence 29:1056 <- global_seqno 1060 2.... 57964703us : execlists_reset_finish: rcs0 0..s. 57964705us : execlists_submission_tasklet: rcs0 awake?=1, active=0, irq-posted?=1 v2: Move the sync into the execlists reset handler so that we coordinate the flush with disabling the interrupt handling and canceling the pending interrupt. v3: Just use synchronize_hardirq() to avoid the might_sleep(), we do not yet have threaded-irq to worry about. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Michel Thierry <michel.thierry@intel.com> Cc: Michał Winiarski <michal.winiarski@intel.com> Cc: Jeff McGee <jeff.mcgee@intel.com> --- drivers/gpu/drm/i915/intel_lrc.c | 7 ++++--- drivers/gpu/drm/i915/intel_uncore.c | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 67b6a0f658d6..ce09c5ad334f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -805,6 +805,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) spin_unlock(&engine->timeline->lock); + /* Mark all CS interrupts as complete */ + smp_store_mb(execlists->active, 0); + synchronize_hardirq(engine->i915->drm.irq); + /* * The port is checked prior to scheduling a tasklet, but * just in case we have suspended the tasklet to do the @@ -813,9 +817,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) */ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - /* Mark all CS interrupts as complete */ - execlists->active = 0; - local_irq_restore(flags); } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 4c616d074a97..f37ecfc69e49 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2116,8 +2116,10 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) i915_stop_engines(dev_priv, engine_mask); ret = -ENODEV; - if (reset) + if (reset) { + GEM_TRACE("engine_mask=%x\n", engine_mask); ret = reset(dev_priv, engine_mask); + } if (ret != -ETIMEDOUT) break; -- 2.16.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 19+ messages in thread
* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/4] drm/i915/selftests: Include the trace as a debug aide 2018-03-22 7:35 [PATCH 1/4] drm/i915/selftests: Include the trace as a debug aide Chris Wilson ` (2 preceding siblings ...) 2018-03-22 7:35 ` [PATCH 4/4] drm/i915: Flush pending interrupt following a GPU reset Chris Wilson @ 2018-03-22 7:43 ` Patchwork 2018-03-22 7:49 ` [PATCH v2] " Chris Wilson ` (4 subsequent siblings) 8 siblings, 0 replies; 19+ messages in thread From: Patchwork @ 2018-03-22 7:43 UTC (permalink / raw) To: Chris Wilson; +Cc: intel-gfx == Series Details == Series: series starting with [1/4] drm/i915/selftests: Include the trace as a debug aide URL : https://patchwork.freedesktop.org/series/40439/ State : warning == Summary == $ dim checkpatch origin/drm-tip 6adaf914c971 drm/i915/selftests: Include the trace as a debug aide -:36: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV) #36: FILE: drivers/gpu/drm/i915/selftests/intel_hangcheck.c:627: + if (i915_request_wait(old, 0, 10*HZ) < 0) { ^ total: 0 errors, 0 warnings, 1 checks, 43 lines checked aae1777597a8 drm/i915/selftests: Stress resets-vs-request-priority 10e13a64240a drm/i915: Use full serialisation around engine->irq_posted a41114e3c858 drm/i915: Flush pending interrupt following a GPU reset -:23: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line) #23: 2d..1 57964617us : __i915_request_unsubmit: rcs0 fence 29:1056 <- global_seqno 1060 total: 0 errors, 1 warnings, 0 checks, 30 lines checked _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2] drm/i915/selftests: Include the trace as a debug aide 2018-03-22 7:35 [PATCH 1/4] drm/i915/selftests: Include the trace as a debug aide Chris Wilson ` (3 preceding siblings ...) 2018-03-22 7:43 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/4] drm/i915/selftests: Include the trace as a debug aide Patchwork @ 2018-03-22 7:49 ` Chris Wilson 2018-03-22 14:26 ` Mika Kuoppala 2018-03-22 7:58 ` ✓ Fi.CI.BAT: success for series starting with [1/4] " Patchwork ` (3 subsequent siblings) 8 siblings, 1 reply; 19+ messages in thread From: Chris Wilson @ 2018-03-22 7:49 UTC (permalink / raw) To: intel-gfx If we fail to reset the GPU in a timely fashion, dump the GEM trace so that we can see what operations were in flight when the GPU got stuck. v2: There's more than one timeout that deserves tracing! v3: Silence checkpatch by not even using a product at all! Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 4372826998aa..9b235dae8dd9 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -260,8 +260,11 @@ static void wedge_me(struct work_struct *work) { struct wedge_me *w = container_of(work, typeof(*w), work.work); - pr_err("%pS timed out, cancelling all further testing.\n", - w->symbol); + pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); + + GEM_TRACE("%pS timed out.\n", w->symbol); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(w->i915); } @@ -621,9 +624,19 @@ static int active_engine(void *data) mutex_unlock(&engine->i915->drm.struct_mutex); if (old) { - i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT); + if (i915_request_wait(old, 0, HZ) < 0) { + GEM_TRACE("%s timed out.\n", engine->name); + GEM_TRACE_DUMP(); + + i915_gem_set_wedged(engine->i915); + i915_request_put(old); + err = -EIO; + break; + } i915_request_put(old); } + + cond_resched(); } for (count = 0; count < ARRAY_SIZE(rq); count++) @@ -1126,6 +1139,10 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) err = i915_subtests(tests, i915); + mutex_lock(&i915->drm.struct_mutex); + flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + i915_modparams.enable_hangcheck = saved_hangcheck; intel_runtime_pm_put(i915); -- 2.16.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2] drm/i915/selftests: Include the trace as a debug aide 2018-03-22 7:49 ` [PATCH v2] " Chris Wilson @ 2018-03-22 14:26 ` Mika Kuoppala 2018-03-22 14:30 ` Chris Wilson 0 siblings, 1 reply; 19+ messages in thread From: Mika Kuoppala @ 2018-03-22 14:26 UTC (permalink / raw) To: Chris Wilson, intel-gfx Chris Wilson <chris@chris-wilson.co.uk> writes: > If we fail to reset the GPU in a timely fashion, dump the GEM trace so > that we can see what operations were in flight when the GPU got stuck. > > v2: There's more than one timeout that deserves tracing! > v3: Silence checkpatch by not even using a product at all! > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 23 ++++++++++++++++++++--- > 1 file changed, 20 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > index 4372826998aa..9b235dae8dd9 100644 > --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > @@ -260,8 +260,11 @@ static void wedge_me(struct work_struct *work) > { > struct wedge_me *w = container_of(work, typeof(*w), work.work); > > - pr_err("%pS timed out, cancelling all further testing.\n", > - w->symbol); > + pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); > + > + GEM_TRACE("%pS timed out.\n", w->symbol); > + GEM_TRACE_DUMP(); > + > i915_gem_set_wedged(w->i915); > } > > @@ -621,9 +624,19 @@ static int active_engine(void *data) > mutex_unlock(&engine->i915->drm.struct_mutex); > > if (old) { > - i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT); > + if (i915_request_wait(old, 0, HZ) < 0) { > + GEM_TRACE("%s timed out.\n", engine->name); > + GEM_TRACE_DUMP(); > + > + i915_gem_set_wedged(engine->i915); > + i915_request_put(old); > + err = -EIO; > + break; > + } Using err = i915_request_wait() could have saved one extra request_put but I dunno if it would be any cleaner. > i915_request_put(old); > } > + > + cond_resched(); To give more slack for other engines and main thread to proceed? > } > > for (count = 0; count < ARRAY_SIZE(rq); count++) > @@ -1126,6 +1139,10 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) > > err = i915_subtests(tests, i915); > > + mutex_lock(&i915->drm.struct_mutex); > + flush_test(i915, I915_WAIT_LOCKED); > + mutex_unlock(&i915->drm.struct_mutex); > + To wash out leftovers. Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > i915_modparams.enable_hangcheck = saved_hangcheck; > intel_runtime_pm_put(i915); > > -- > 2.16.2 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2] drm/i915/selftests: Include the trace as a debug aide 2018-03-22 14:26 ` Mika Kuoppala @ 2018-03-22 14:30 ` Chris Wilson 2018-03-22 19:29 ` Jeff McGee 0 siblings, 1 reply; 19+ messages in thread From: Chris Wilson @ 2018-03-22 14:30 UTC (permalink / raw) To: Mika Kuoppala, intel-gfx Quoting Mika Kuoppala (2018-03-22 14:26:41) > Chris Wilson <chris@chris-wilson.co.uk> writes: > > > If we fail to reset the GPU in a timely fashion, dump the GEM trace so > > that we can see what operations were in flight when the GPU got stuck. > > > > v2: There's more than one timeout that deserves tracing! > > v3: Silence checkpatch by not even using a product at all! > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 23 ++++++++++++++++++++--- > > 1 file changed, 20 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > > index 4372826998aa..9b235dae8dd9 100644 > > --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > > +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > > @@ -260,8 +260,11 @@ static void wedge_me(struct work_struct *work) > > { > > struct wedge_me *w = container_of(work, typeof(*w), work.work); > > > > - pr_err("%pS timed out, cancelling all further testing.\n", > > - w->symbol); > > + pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); > > + > > + GEM_TRACE("%pS timed out.\n", w->symbol); > > + GEM_TRACE_DUMP(); > > + > > i915_gem_set_wedged(w->i915); > > } > > > > @@ -621,9 +624,19 @@ static int active_engine(void *data) > > mutex_unlock(&engine->i915->drm.struct_mutex); > > > > if (old) { > > - i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT); > > + if (i915_request_wait(old, 0, HZ) < 0) { > > + GEM_TRACE("%s timed out.\n", engine->name); > > + GEM_TRACE_DUMP(); > > + > > + i915_gem_set_wedged(engine->i915); > > + i915_request_put(old); > > + err = -EIO; > > + break; > > + } > > Using err = i915_request_wait() could have saved one extra request_put > but I dunno if it would be any cleaner. It's also -ETIME, which didn't fit my intention. > > > i915_request_put(old); > > } > > + > > + cond_resched(); > > To give more slack for other engines and main thread to proceed? Yes. Otherwise, it spins mighty fine. > > > } > > > > for (count = 0; count < ARRAY_SIZE(rq); count++) > > @@ -1126,6 +1139,10 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) > > > > err = i915_subtests(tests, i915); > > > > + mutex_lock(&i915->drm.struct_mutex); > > + flush_test(i915, I915_WAIT_LOCKED); > > + mutex_unlock(&i915->drm.struct_mutex); > > + > > To wash out leftovers. Yeah, from the early abort we left requests unaccounted for and needed to grab the struct_mutex to run retire. Otherwise we hit assertions later on about trying to unload the driver with requests still inflight. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2] drm/i915/selftests: Include the trace as a debug aide 2018-03-22 14:30 ` Chris Wilson @ 2018-03-22 19:29 ` Jeff McGee 2018-03-22 20:37 ` Chris Wilson 0 siblings, 1 reply; 19+ messages in thread From: Jeff McGee @ 2018-03-22 19:29 UTC (permalink / raw) To: Chris Wilson; +Cc: intel-gfx On Thu, Mar 22, 2018 at 02:30:09PM +0000, Chris Wilson wrote: > Quoting Mika Kuoppala (2018-03-22 14:26:41) > > Chris Wilson <chris@chris-wilson.co.uk> writes: > > > > > If we fail to reset the GPU in a timely fashion, dump the GEM trace so > > > that we can see what operations were in flight when the GPU got stuck. > > > > > > v2: There's more than one timeout that deserves tracing! > > > v3: Silence checkpatch by not even using a product at all! > > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > > --- > > > drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 23 ++++++++++++++++++++--- > > > 1 file changed, 20 insertions(+), 3 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > > > index 4372826998aa..9b235dae8dd9 100644 > > > --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > > > +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > > > @@ -260,8 +260,11 @@ static void wedge_me(struct work_struct *work) > > > { > > > struct wedge_me *w = container_of(work, typeof(*w), work.work); > > > > > > - pr_err("%pS timed out, cancelling all further testing.\n", > > > - w->symbol); > > > + pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); > > > + > > > + GEM_TRACE("%pS timed out.\n", w->symbol); > > > + GEM_TRACE_DUMP(); > > > + > > > i915_gem_set_wedged(w->i915); > > > } > > > > > > @@ -621,9 +624,19 @@ static int active_engine(void *data) > > > mutex_unlock(&engine->i915->drm.struct_mutex); > > > > > > if (old) { > > > - i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT); > > > + if (i915_request_wait(old, 0, HZ) < 0) { > > > + GEM_TRACE("%s timed out.\n", engine->name); > > > + GEM_TRACE_DUMP(); > > > + > > > + i915_gem_set_wedged(engine->i915); > > > + i915_request_put(old); > > > + err = -EIO; > > > + break; > > > + } > > > > Using err = i915_request_wait() could have saved one extra request_put > > but I dunno if it would be any cleaner. > > It's also -ETIME, which didn't fit my intention. > > > > > > i915_request_put(old); > > > } > > > + > > > + cond_resched(); > > > > To give more slack for other engines and main thread to proceed? > > Yes. Otherwise, it spins mighty fine. > > > > > } > > > > > > for (count = 0; count < ARRAY_SIZE(rq); count++) > > > @@ -1126,6 +1139,10 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) > > > > > > err = i915_subtests(tests, i915); > > > > > > + mutex_lock(&i915->drm.struct_mutex); > > > + flush_test(i915, I915_WAIT_LOCKED); > > > + mutex_unlock(&i915->drm.struct_mutex); > > > + > > > > To wash out leftovers. > > Yeah, from the early abort we left requests unaccounted for and needed > to grab the struct_mutex to run retire. Otherwise we hit assertions > later on about trying to unload the driver with requests still inflight. > -Chris On this and the 3 others in this series... Reviewed-by: Jeff McGee <jeff.mcgee@intel.com> _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2] drm/i915/selftests: Include the trace as a debug aide 2018-03-22 19:29 ` Jeff McGee @ 2018-03-22 20:37 ` Chris Wilson 0 siblings, 0 replies; 19+ messages in thread From: Chris Wilson @ 2018-03-22 20:37 UTC (permalink / raw) To: Jeff McGee; +Cc: intel-gfx Quoting Jeff McGee (2018-03-22 19:29:16) > On Thu, Mar 22, 2018 at 02:30:09PM +0000, Chris Wilson wrote: > > Quoting Mika Kuoppala (2018-03-22 14:26:41) > > > Chris Wilson <chris@chris-wilson.co.uk> writes: > > > > > > > If we fail to reset the GPU in a timely fashion, dump the GEM trace so > > > > that we can see what operations were in flight when the GPU got stuck. > > > > > > > > v2: There's more than one timeout that deserves tracing! > > > > v3: Silence checkpatch by not even using a product at all! > > > > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > > > --- > > > > drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 23 ++++++++++++++++++++--- > > > > 1 file changed, 20 insertions(+), 3 deletions(-) > > > > > > > > diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > > > > index 4372826998aa..9b235dae8dd9 100644 > > > > --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > > > > +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > > > > @@ -260,8 +260,11 @@ static void wedge_me(struct work_struct *work) > > > > { > > > > struct wedge_me *w = container_of(work, typeof(*w), work.work); > > > > > > > > - pr_err("%pS timed out, cancelling all further testing.\n", > > > > - w->symbol); > > > > + pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); > > > > + > > > > + GEM_TRACE("%pS timed out.\n", w->symbol); > > > > + GEM_TRACE_DUMP(); > > > > + > > > > i915_gem_set_wedged(w->i915); > > > > } > > > > > > > > @@ -621,9 +624,19 @@ static int active_engine(void *data) > > > > mutex_unlock(&engine->i915->drm.struct_mutex); > > > > > > > > if (old) { > > > > - i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT); > > > > + if (i915_request_wait(old, 0, HZ) < 0) { > > > > + GEM_TRACE("%s timed out.\n", engine->name); > > > > + GEM_TRACE_DUMP(); > > > > + > > > > + i915_gem_set_wedged(engine->i915); > > > > + i915_request_put(old); > > > > + err = -EIO; > > > > + break; > > > > + } > > > > > > Using err = i915_request_wait() could have saved one extra request_put > > > but I dunno if it would be any cleaner. > > > > It's also -ETIME, which didn't fit my intention. > > > > > > > > > i915_request_put(old); > > > > } > > > > + > > > > + cond_resched(); > > > > > > To give more slack for other engines and main thread to proceed? > > > > Yes. Otherwise, it spins mighty fine. > > > > > > > } > > > > > > > > for (count = 0; count < ARRAY_SIZE(rq); count++) > > > > @@ -1126,6 +1139,10 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) > > > > > > > > err = i915_subtests(tests, i915); > > > > > > > > + mutex_lock(&i915->drm.struct_mutex); > > > > + flush_test(i915, I915_WAIT_LOCKED); > > > > + mutex_unlock(&i915->drm.struct_mutex); > > > > + > > > > > > To wash out leftovers. > > > > Yeah, from the early abort we left requests unaccounted for and needed > > to grab the struct_mutex to run retire. Otherwise we hit assertions > > later on about trying to unload the driver with requests still inflight. > > -Chris > > On this and the 3 others in this series... > > Reviewed-by: Jeff McGee <jeff.mcgee@intel.com> Much appreciated. Pushed, let's hope CI holds up and that we're ready to start talking about the real changes required for forced preemption as opposed to getting the existing code working. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* ✓ Fi.CI.BAT: success for series starting with [1/4] drm/i915/selftests: Include the trace as a debug aide 2018-03-22 7:35 [PATCH 1/4] drm/i915/selftests: Include the trace as a debug aide Chris Wilson ` (4 preceding siblings ...) 2018-03-22 7:49 ` [PATCH v2] " Chris Wilson @ 2018-03-22 7:58 ` Patchwork 2018-03-22 8:02 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915/selftests: Include the trace as a debug aide (rev2) Patchwork ` (2 subsequent siblings) 8 siblings, 0 replies; 19+ messages in thread From: Patchwork @ 2018-03-22 7:58 UTC (permalink / raw) To: Chris Wilson; +Cc: intel-gfx == Series Details == Series: series starting with [1/4] drm/i915/selftests: Include the trace as a debug aide URL : https://patchwork.freedesktop.org/series/40439/ State : success == Summary == Series 40439v1 series starting with [1/4] drm/i915/selftests: Include the trace as a debug aide https://patchwork.freedesktop.org/api/1.0/series/40439/revisions/1/mbox/ ---- Known issues: Test debugfs_test: Subgroup read_all_entries: incomplete -> PASS (fi-snb-2520m) fdo#103713 Test kms_flip: Subgroup basic-flip-vs-wf_vblank: fail -> PASS (fi-skl-6770hq) fdo#100368 Test kms_pipe_crc_basic: Subgroup suspend-read-crc-pipe-b: pass -> DMESG-FAIL (fi-cnl-y3) fdo#104951 Subgroup suspend-read-crc-pipe-c: pass -> INCOMPLETE (fi-bxt-dsi) fdo#103927 fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713 fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368 fdo#104951 https://bugs.freedesktop.org/show_bug.cgi?id=104951 fdo#103927 https://bugs.freedesktop.org/show_bug.cgi?id=103927 fi-bdw-5557u total:285 pass:264 dwarn:0 dfail:0 fail:0 skip:21 time:430s fi-bdw-gvtdvm total:285 pass:261 dwarn:0 dfail:0 fail:0 skip:24 time:442s fi-blb-e6850 total:285 pass:220 dwarn:1 dfail:0 fail:0 skip:64 time:379s fi-bsw-n3050 total:285 pass:239 dwarn:0 dfail:0 fail:0 skip:46 time:536s fi-bwr-2160 total:285 pass:180 dwarn:0 dfail:0 fail:0 skip:105 time:297s fi-bxt-dsi total:243 pass:216 dwarn:0 dfail:0 fail:0 skip:26 fi-bxt-j4205 total:285 pass:256 dwarn:0 dfail:0 fail:0 skip:29 time:511s fi-byt-j1900 total:285 pass:250 dwarn:0 dfail:0 fail:0 skip:35 time:513s fi-byt-n2820 total:285 pass:246 dwarn:0 dfail:0 fail:0 skip:39 time:503s fi-cfl-8700k total:285 pass:257 dwarn:0 dfail:0 fail:0 skip:28 time:416s fi-cfl-s2 total:285 pass:259 dwarn:0 dfail:0 fail:0 skip:26 time:567s fi-cnl-drrs total:285 pass:254 dwarn:3 dfail:0 fail:0 skip:28 time:527s fi-cnl-y3 total:285 pass:258 dwarn:0 dfail:1 fail:0 skip:26 time:587s fi-elk-e7500 total:285 pass:225 dwarn:1 dfail:0 fail:0 skip:59 time:427s fi-gdg-551 total:285 pass:176 dwarn:0 dfail:0 fail:1 skip:108 time:320s fi-glk-1 total:285 pass:257 dwarn:0 dfail:0 fail:0 skip:28 time:531s fi-hsw-4770 total:285 pass:258 dwarn:0 dfail:0 fail:0 skip:27 time:404s fi-ilk-650 total:285 pass:225 dwarn:0 dfail:0 fail:0 skip:60 time:419s fi-ivb-3520m total:285 pass:256 dwarn:0 dfail:0 fail:0 skip:29 time:469s fi-ivb-3770 total:285 pass:252 dwarn:0 dfail:0 fail:0 skip:33 time:434s fi-kbl-7500u total:285 pass:260 dwarn:1 dfail:0 fail:0 skip:24 time:475s fi-kbl-7567u total:285 pass:265 dwarn:0 dfail:0 fail:0 skip:20 time:464s fi-kbl-r total:285 pass:258 dwarn:0 dfail:0 fail:0 skip:27 time:513s fi-pnv-d510 total:285 pass:219 dwarn:1 dfail:0 fail:0 skip:65 time:653s fi-skl-6260u total:285 pass:265 dwarn:0 dfail:0 fail:0 skip:20 time:437s fi-skl-6600u total:285 pass:258 dwarn:0 dfail:0 fail:0 skip:27 time:535s fi-skl-6700k2 total:285 pass:261 dwarn:0 dfail:0 fail:0 skip:24 time:504s fi-skl-6770hq total:285 pass:265 dwarn:0 dfail:0 fail:0 skip:20 time:512s fi-skl-guc total:285 pass:257 dwarn:0 dfail:0 fail:0 skip:28 time:427s fi-skl-gvtdvm total:285 pass:262 dwarn:0 dfail:0 fail:0 skip:23 time:444s fi-snb-2520m total:285 pass:245 dwarn:0 dfail:0 fail:0 skip:40 time:563s fi-snb-2600 total:285 pass:245 dwarn:0 dfail:0 fail:0 skip:40 time:403s fi-cfl-u failed to connect after reboot dff9ece60048108782aab6d6123822c1d34b0e5a drm-tip: 2018y-03m-21d-20h-44m-14s UTC integration manifest a41114e3c858 drm/i915: Flush pending interrupt following a GPU reset 10e13a64240a drm/i915: Use full serialisation around engine->irq_posted aae1777597a8 drm/i915/selftests: Stress resets-vs-request-priority 6adaf914c971 drm/i915/selftests: Include the trace as a debug aide == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8443/issues.html _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915/selftests: Include the trace as a debug aide (rev2) 2018-03-22 7:35 [PATCH 1/4] drm/i915/selftests: Include the trace as a debug aide Chris Wilson ` (5 preceding siblings ...) 2018-03-22 7:58 ` ✓ Fi.CI.BAT: success for series starting with [1/4] " Patchwork @ 2018-03-22 8:02 ` Patchwork 2018-03-22 8:19 ` ✓ Fi.CI.BAT: success " Patchwork 2018-03-22 10:23 ` ✓ Fi.CI.IGT: " Patchwork 8 siblings, 0 replies; 19+ messages in thread From: Patchwork @ 2018-03-22 8:02 UTC (permalink / raw) To: Chris Wilson; +Cc: intel-gfx == Series Details == Series: series starting with [v2] drm/i915/selftests: Include the trace as a debug aide (rev2) URL : https://patchwork.freedesktop.org/series/40439/ State : warning == Summary == $ dim checkpatch origin/drm-tip d4b28d06e3b8 drm/i915/selftests: Include the trace as a debug aide 59301a5a5c45 drm/i915/selftests: Stress resets-vs-request-priority 4eb53ac79094 drm/i915: Use full serialisation around engine->irq_posted cf4fae2b05ff drm/i915: Flush pending interrupt following a GPU reset -:23: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line) #23: 2d..1 57964617us : __i915_request_unsubmit: rcs0 fence 29:1056 <- global_seqno 1060 total: 0 errors, 1 warnings, 0 checks, 30 lines checked _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* ✓ Fi.CI.BAT: success for series starting with [v2] drm/i915/selftests: Include the trace as a debug aide (rev2) 2018-03-22 7:35 [PATCH 1/4] drm/i915/selftests: Include the trace as a debug aide Chris Wilson ` (6 preceding siblings ...) 2018-03-22 8:02 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915/selftests: Include the trace as a debug aide (rev2) Patchwork @ 2018-03-22 8:19 ` Patchwork 2018-03-22 10:23 ` ✓ Fi.CI.IGT: " Patchwork 8 siblings, 0 replies; 19+ messages in thread From: Patchwork @ 2018-03-22 8:19 UTC (permalink / raw) To: Chris Wilson; +Cc: intel-gfx == Series Details == Series: series starting with [v2] drm/i915/selftests: Include the trace as a debug aide (rev2) URL : https://patchwork.freedesktop.org/series/40439/ State : success == Summary == Series 40439v2 series starting with [v2] drm/i915/selftests: Include the trace as a debug aide https://patchwork.freedesktop.org/api/1.0/series/40439/revisions/2/mbox/ ---- Known issues: Test debugfs_test: Subgroup read_all_entries: incomplete -> PASS (fi-snb-2520m) fdo#103713 +1 Test gem_exec_suspend: Subgroup basic-s3: pass -> INCOMPLETE (fi-skl-6260u) fdo#104108 Test kms_flip: Subgroup basic-flip-vs-wf_vblank: fail -> PASS (fi-skl-6770hq) fdo#100368 fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713 fdo#104108 https://bugs.freedesktop.org/show_bug.cgi?id=104108 fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368 fi-bdw-5557u total:285 pass:264 dwarn:0 dfail:0 fail:0 skip:21 time:433s fi-bdw-gvtdvm total:285 pass:261 dwarn:0 dfail:0 fail:0 skip:24 time:444s fi-blb-e6850 total:285 pass:220 dwarn:1 dfail:0 fail:0 skip:64 time:380s fi-bsw-n3050 total:285 pass:239 dwarn:0 dfail:0 fail:0 skip:46 time:535s fi-bwr-2160 total:285 pass:180 dwarn:0 dfail:0 fail:0 skip:105 time:297s fi-bxt-dsi total:285 pass:255 dwarn:0 dfail:0 fail:0 skip:30 time:512s fi-bxt-j4205 total:285 pass:256 dwarn:0 dfail:0 fail:0 skip:29 time:513s fi-byt-j1900 total:285 pass:250 dwarn:0 dfail:0 fail:0 skip:35 time:516s fi-byt-n2820 total:285 pass:246 dwarn:0 dfail:0 fail:0 skip:39 time:503s fi-cfl-8700k total:285 pass:257 dwarn:0 dfail:0 fail:0 skip:28 time:409s fi-cfl-s2 total:285 pass:259 dwarn:0 dfail:0 fail:0 skip:26 time:564s fi-cnl-drrs total:285 pass:254 dwarn:3 dfail:0 fail:0 skip:28 time:535s fi-cnl-y3 total:285 pass:259 dwarn:0 dfail:0 fail:0 skip:26 time:588s fi-elk-e7500 total:285 pass:225 dwarn:1 dfail:0 fail:0 skip:59 time:429s fi-gdg-551 total:285 pass:176 dwarn:0 dfail:0 fail:1 skip:108 time:320s fi-glk-1 total:285 pass:257 dwarn:0 dfail:0 fail:0 skip:28 time:536s fi-hsw-4770 total:285 pass:258 dwarn:0 dfail:0 fail:0 skip:27 time:401s fi-ilk-650 total:285 pass:225 dwarn:0 dfail:0 fail:0 skip:60 time:425s fi-ivb-3520m total:285 pass:256 dwarn:0 dfail:0 fail:0 skip:29 time:472s fi-ivb-3770 total:285 pass:252 dwarn:0 dfail:0 fail:0 skip:33 time:434s fi-kbl-7500u total:285 pass:260 dwarn:1 dfail:0 fail:0 skip:24 time:480s fi-kbl-7567u total:285 pass:265 dwarn:0 dfail:0 fail:0 skip:20 time:466s fi-kbl-r total:285 pass:258 dwarn:0 dfail:0 fail:0 skip:27 time:513s fi-pnv-d510 total:285 pass:219 dwarn:1 dfail:0 fail:0 skip:65 time:652s fi-skl-6260u total:108 pass:104 dwarn:0 dfail:0 fail:0 skip:3 fi-skl-6600u total:285 pass:258 dwarn:0 dfail:0 fail:0 skip:27 time:529s fi-skl-6700k2 total:285 pass:261 dwarn:0 dfail:0 fail:0 skip:24 time:495s fi-skl-6770hq total:285 pass:265 dwarn:0 dfail:0 fail:0 skip:20 time:500s fi-skl-guc total:285 pass:257 dwarn:0 dfail:0 fail:0 skip:28 time:438s fi-skl-gvtdvm total:285 pass:262 dwarn:0 dfail:0 fail:0 skip:23 time:444s fi-snb-2520m total:242 pass:208 dwarn:0 dfail:0 fail:0 skip:33 fi-snb-2600 total:285 pass:245 dwarn:0 dfail:0 fail:0 skip:40 time:398s dff9ece60048108782aab6d6123822c1d34b0e5a drm-tip: 2018y-03m-21d-20h-44m-14s UTC integration manifest cf4fae2b05ff drm/i915: Flush pending interrupt following a GPU reset 4eb53ac79094 drm/i915: Use full serialisation around engine->irq_posted 59301a5a5c45 drm/i915/selftests: Stress resets-vs-request-priority d4b28d06e3b8 drm/i915/selftests: Include the trace as a debug aide == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8444/issues.html _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
* ✓ Fi.CI.IGT: success for series starting with [v2] drm/i915/selftests: Include the trace as a debug aide (rev2) 2018-03-22 7:35 [PATCH 1/4] drm/i915/selftests: Include the trace as a debug aide Chris Wilson ` (7 preceding siblings ...) 2018-03-22 8:19 ` ✓ Fi.CI.BAT: success " Patchwork @ 2018-03-22 10:23 ` Patchwork 8 siblings, 0 replies; 19+ messages in thread From: Patchwork @ 2018-03-22 10:23 UTC (permalink / raw) To: Chris Wilson; +Cc: intel-gfx == Series Details == Series: series starting with [v2] drm/i915/selftests: Include the trace as a debug aide (rev2) URL : https://patchwork.freedesktop.org/series/40439/ State : success == Summary == ---- Known issues: Test kms_cursor_legacy: Subgroup flip-vs-cursor-atomic: pass -> FAIL (shard-hsw) fdo#102670 Test kms_flip: Subgroup flip-vs-wf_vblank-interruptible: fail -> PASS (shard-hsw) fdo#100368 +1 Test kms_pipe_crc_basic: Subgroup read-crc-pipe-c-frame-sequence: pass -> FAIL (shard-apl) fdo#103481 Test kms_setmode: Subgroup basic: pass -> FAIL (shard-apl) fdo#99912 Test kms_vblank: Subgroup pipe-a-ts-continuation-suspend: pass -> INCOMPLETE (shard-hsw) fdo#103540 fdo#102670 https://bugs.freedesktop.org/show_bug.cgi?id=102670 fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368 fdo#103481 https://bugs.freedesktop.org/show_bug.cgi?id=103481 fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912 fdo#103540 https://bugs.freedesktop.org/show_bug.cgi?id=103540 shard-apl total:3478 pass:1814 dwarn:1 dfail:0 fail:8 skip:1655 time:13097s shard-hsw total:3391 pass:1726 dwarn:1 dfail:0 fail:3 skip:1659 time:11609s shard-snb total:3478 pass:1357 dwarn:1 dfail:0 fail:3 skip:2117 time:7279s Blacklisted hosts: shard-kbl total:3454 pass:1924 dwarn:1 dfail:0 fail:9 skip:1519 time:9758s == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8444/shards.html _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2018-03-31 8:59 UTC | newest] Thread overview: 19+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2018-03-22 7:35 [PATCH 1/4] drm/i915/selftests: Include the trace as a debug aide Chris Wilson 2018-03-22 7:35 ` [PATCH 2/4] drm/i915/selftests: Stress resets-vs-request-priority Chris Wilson 2018-03-22 7:35 ` [PATCH 3/4] drm/i915: Use full serialisation around engine->irq_posted Chris Wilson 2018-03-22 14:35 ` Mika Kuoppala 2018-03-22 15:34 ` Jeff McGee 2018-03-22 17:01 ` Chris Wilson 2018-03-30 23:08 ` Chris Wilson 2018-03-31 8:59 ` Chris Wilson 2018-03-22 7:35 ` [PATCH 4/4] drm/i915: Flush pending interrupt following a GPU reset Chris Wilson 2018-03-22 7:43 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/4] drm/i915/selftests: Include the trace as a debug aide Patchwork 2018-03-22 7:49 ` [PATCH v2] " Chris Wilson 2018-03-22 14:26 ` Mika Kuoppala 2018-03-22 14:30 ` Chris Wilson 2018-03-22 19:29 ` Jeff McGee 2018-03-22 20:37 ` Chris Wilson 2018-03-22 7:58 ` ✓ Fi.CI.BAT: success for series starting with [1/4] " Patchwork 2018-03-22 8:02 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915/selftests: Include the trace as a debug aide (rev2) Patchwork 2018-03-22 8:19 ` ✓ Fi.CI.BAT: success " Patchwork 2018-03-22 10:23 ` ✓ Fi.CI.IGT: " Patchwork
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.