All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] drm/i915: signal first fence from irq handler if complete
@ 2017-02-23 15:42 Chris Wilson
  2017-02-23 15:42 ` [PATCH v2 2/2] drm/i915: Delay disabling the user interrupt for breadcrumbs Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Chris Wilson @ 2017-02-23 15:42 UTC (permalink / raw)
  To: intel-gfx

As execlists and other non-semaphore multi-engine devices coordinate
between engines using interrupts, we can shave off a few 10s of
microsecond of scheduling latency by doing the fence signaling from the
interrupt as opposed to a RT kthread. (Realistically the delay adds
about 1% to an individual cross-engine workload.) We only signal the
first fence in order to limit the amount of work we move into the
interrupt handler. We also have to remember that our breadcrumbs may be
unordered with respect to the interrupt and so we still require the
waiter process to perform some heavyweight coherency fixups, as well as
traversing the tree of waiters.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          | 13 ++++++------
 drivers/gpu/drm/i915/i915_gem_request.c  |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.h  |  2 ++
 drivers/gpu/drm/i915/i915_irq.c          | 35 +++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 17 ++++++----------
 drivers/gpu/drm/i915/intel_ringbuffer.h  | 33 +++++++++++-------------------
 6 files changed, 59 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eed9ead1b592..07a69e1ff1c3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4065,9 +4065,9 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
 	 * is woken.
 	 */
 	if (engine->irq_seqno_barrier &&
-	    rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current &&
 	    test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) {
-		struct task_struct *tsk;
+		struct intel_breadcrumbs *b = &engine->breadcrumbs;
+		unsigned long flags;
 
 		/* The ordering of irq_posted versus applying the barrier
 		 * is crucial. The clearing of the current irq_posted must
@@ -4089,17 +4089,16 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
 		 * the seqno before we believe it coherent since they see
 		 * irq_posted == false but we are still running).
 		 */
-		rcu_read_lock();
-		tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
-		if (tsk && tsk != current)
+		spin_lock_irqsave(&b->lock, flags);
+		if (b->first_wait && b->first_wait->tsk != current)
 			/* Note that if the bottom-half is changed as we
 			 * are sending the wake-up, the new bottom-half will
 			 * be woken by whomever made the change. We only have
 			 * to worry about when we steal the irq-posted for
 			 * ourself.
 			 */
-			wake_up_process(tsk);
-		rcu_read_unlock();
+			wake_up_process(b->first_wait->tsk);
+		spin_unlock_irqrestore(&b->lock, flags);
 
 		if (__i915_gem_request_completed(req, seqno))
 			return true;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index fbfeb5f8d069..77c3ee7a3fd0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1083,7 +1083,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
-	intel_wait_init(&wait);
+	intel_wait_init(&wait, req);
 
 restart:
 	do {
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 5f73d8c0a38a..0efee879df23 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -32,10 +32,12 @@
 
 struct drm_file;
 struct drm_i915_gem_object;
+struct drm_i915_gem_request;
 
 struct intel_wait {
 	struct rb_node node;
 	struct task_struct *tsk;
+	struct drm_i915_gem_request *request;
 	u32 seqno;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index bc70e2c451b2..3c79753edf0e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1033,12 +1033,41 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 
 static void notify_ring(struct intel_engine_cs *engine)
 {
-	bool waiters;
+	struct drm_i915_gem_request *rq = NULL;
+	struct intel_wait *wait;
+
+	if (!intel_engine_has_waiter(engine))
+		return;
 
 	atomic_inc(&engine->irq_count);
 	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
-	waiters = intel_engine_wakeup(engine);
-	trace_intel_engine_notify(engine, waiters);
+
+	spin_lock(&engine->breadcrumbs.lock);
+	wait = engine->breadcrumbs.first_wait;
+	if (wait) {
+		/* We use a callback from the dma-fence to submit
+		 * requests after waiting on our own requests. To
+		 * ensure minimum delay in queuing the next request to
+		 * hardware, signal the fence now rather than wait for
+		 * the signaler to be woken up. We still wake up the
+		 * waiter in order to handle the irq-seqno coherency
+		 * issues (we may receive the interrupt before the
+		 * seqno is written, see __i915_request_irq_complete())
+		 * and to handle coalescing of multiple seqno updates
+		 * and many waiters.
+		 */
+		if (i915_seqno_passed(intel_engine_get_seqno(engine),
+				      wait->seqno))
+			rq = wait->request;
+
+		wake_up_process(wait->tsk);
+	}
+	spin_unlock(&engine->breadcrumbs.lock);
+
+	if (rq) /* rq is protected by RCU, so safe from hard-irq context */
+		dma_fence_signal(&rq->fence);
+
+	trace_intel_engine_notify(engine, wait);
 }
 
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 027c93e34c97..5f2665aa817c 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -81,8 +81,11 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
 	 * every jiffie in order to kick the oldest waiter to do the
 	 * coherent seqno check.
 	 */
-	if (intel_engine_wakeup(engine))
-		mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
+	if (!intel_engine_has_waiter(engine))
+		return;
+
+	intel_engine_wakeup(engine);
+	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
 }
 
 static void irq_enable(struct intel_engine_cs *engine)
@@ -275,7 +278,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
 	}
 	rb_link_node(&wait->node, parent, p);
 	rb_insert_color(&wait->node, &b->waiters);
-	GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh));
 
 	if (completed) {
 		struct rb_node *next = rb_next(completed);
@@ -284,7 +286,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
 		if (next && next != &wait->node) {
 			GEM_BUG_ON(first);
 			b->first_wait = to_wait(next);
-			rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
 			/* As there is a delay between reading the current
 			 * seqno, processing the completed tasks and selecting
 			 * the next waiter, we may have missed the interrupt
@@ -311,7 +312,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
 	if (first) {
 		GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
 		b->first_wait = wait;
-		rcu_assign_pointer(b->irq_seqno_bh, wait->tsk);
 		/* After assigning ourselves as the new bottom-half, we must
 		 * perform a cursory check to prevent a missed interrupt.
 		 * Either we miss the interrupt whilst programming the hardware,
@@ -322,7 +322,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
 		 */
 		__intel_breadcrumbs_enable_irq(b);
 	}
-	GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh));
 	GEM_BUG_ON(!b->first_wait);
 	GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
 
@@ -370,8 +369,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
 		const int priority = wakeup_priority(b, wait->tsk);
 		struct rb_node *next;
 
-		GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk);
-
 		/* We are the current bottom-half. Find the next candidate,
 		 * the first waiter in the queue on the remaining oldest
 		 * request. As multiple seqnos may complete in the time it
@@ -413,13 +410,11 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
 			 * exception rather than a seqno completion.
 			 */
 			b->first_wait = to_wait(next);
-			rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
 			if (b->first_wait->seqno != wait->seqno)
 				__intel_breadcrumbs_enable_irq(b);
 			wake_up_process(b->first_wait->tsk);
 		} else {
 			b->first_wait = NULL;
-			rcu_assign_pointer(b->irq_seqno_bh, NULL);
 			__intel_breadcrumbs_disable_irq(b);
 		}
 	} else {
@@ -433,7 +428,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
 	GEM_BUG_ON(b->first_wait == wait);
 	GEM_BUG_ON(rb_first(&b->waiters) !=
 		   (b->first_wait ? &b->first_wait->node : NULL));
-	GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters));
 }
 
 void intel_engine_remove_wait(struct intel_engine_cs *engine,
@@ -595,6 +589,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 		return;
 
 	request->signaling.wait.tsk = b->signaler;
+	request->signaling.wait.request = request;
 	request->signaling.wait.seqno = seqno;
 	i915_gem_request_get(request);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0f29e07a9581..c90b60f47f25 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -235,8 +235,6 @@ struct intel_engine_cs {
 	 * the overhead of waking that client is much preferred.
 	 */
 	struct intel_breadcrumbs {
-		struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */
-
 		spinlock_t lock; /* protects the lists of requests; irqsafe */
 		struct rb_root waiters; /* sorted by retirement, priority */
 		struct rb_root signals; /* sorted by retirement */
@@ -582,9 +580,11 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 
-static inline void intel_wait_init(struct intel_wait *wait)
+static inline void intel_wait_init(struct intel_wait *wait,
+				   struct drm_i915_gem_request *rq)
 {
 	wait->tsk = current;
+	wait->request = rq;
 }
 
 static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
@@ -639,29 +639,20 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
 
 static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
 {
-	return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh);
+	return READ_ONCE(engine->breadcrumbs.first_wait);
 }
 
-static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
+static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
 {
+	struct intel_wait *wait;
+	unsigned long flags;
 	bool wakeup = false;
 
-	/* Note that for this not to dangerously chase a dangling pointer,
-	 * we must hold the rcu_read_lock here.
-	 *
-	 * Also note that tsk is likely to be in !TASK_RUNNING state so an
-	 * early test for tsk->state != TASK_RUNNING before wake_up_process()
-	 * is unlikely to be beneficial.
-	 */
-	if (intel_engine_has_waiter(engine)) {
-		struct task_struct *tsk;
-
-		rcu_read_lock();
-		tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
-		if (tsk)
-			wakeup = wake_up_process(tsk);
-		rcu_read_unlock();
-	}
+	spin_lock_irqsave(&engine->breadcrumbs.lock, flags);
+	wait = engine->breadcrumbs.first_wait;
+	if (wait)
+		wakeup = wake_up_process(wait->tsk);
+	spin_unlock_irqrestore(&engine->breadcrumbs.lock, flags);
 
 	return wakeup;
 }
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 2/2] drm/i915: Delay disabling the user interrupt for breadcrumbs
  2017-02-23 15:42 [PATCH v2 1/2] drm/i915: signal first fence from irq handler if complete Chris Wilson
@ 2017-02-23 15:42 ` Chris Wilson
  2017-02-24 10:34   ` Tvrtko Ursulin
  2017-02-23 17:23 ` ✗ Fi.CI.BAT: failure for series starting with [v2,1/2] drm/i915: signal first fence from irq handler if complete Patchwork
  2017-02-24 10:04 ` [PATCH v2 1/2] " Tvrtko Ursulin
  2 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2017-02-23 15:42 UTC (permalink / raw)
  To: intel-gfx

A significant cost in setting up a wait is the overhead of enabling the
interrupt. As we disable the interrupt whenever the queue of waiters is
empty, if we are frequently waiting on alternating batches, we end up
re-enabling the interrupt on a frequent basis. We do want to disable the
interrupt during normal operations as under high load it may add several
thousand interrupts/s - we have been known in the past to occupy whole
cores with our interrupt handler after accidentally leaving user
interrupts enabled. As a compromise, leave the interrupt enabled until
the next IRQ, or the system is idle. This gives a small window for a
waiter to keep the interrupt active and not be delayed by having to
re-enable the interrupt.

v2: Restore hangcheck/missed-irq detection for continuations

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c          |  4 +-
 drivers/gpu/drm/i915/i915_irq.c          |  5 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 95 +++++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 +-
 4 files changed, 65 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a8167003c10b..3ec8c948fe45 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2988,8 +2988,10 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	if (wait_for(intel_execlists_idle(dev_priv), 10))
 		DRM_ERROR("Timeout waiting for engines to idle\n");
 
-	for_each_engine(engine, dev_priv, id)
+	for_each_engine(engine, dev_priv, id) {
+		intel_engine_disarm_breadcrumbs(engine);
 		i915_gem_batch_pool_fini(&engine->batch_pool);
+	}
 
 	GEM_BUG_ON(!dev_priv->gt.awake);
 	dev_priv->gt.awake = false;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3c79753edf0e..ba0bb33e12ed 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1036,9 +1036,6 @@ static void notify_ring(struct intel_engine_cs *engine)
 	struct drm_i915_gem_request *rq = NULL;
 	struct intel_wait *wait;
 
-	if (!intel_engine_has_waiter(engine))
-		return;
-
 	atomic_inc(&engine->irq_count);
 	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
@@ -1061,6 +1058,8 @@ static void notify_ring(struct intel_engine_cs *engine)
 			rq = wait->request;
 
 		wake_up_process(wait->tsk);
+	} else {
+		__intel_engine_disarm_breadcrumbs(engine);
 	}
 	spin_unlock(&engine->breadcrumbs.lock);
 
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 5f2665aa817c..bf14022f6adb 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -35,8 +35,10 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
 {
 	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	struct intel_wait *wait;
+	unsigned long flags;
 
-	if (!b->irq_enabled)
+	if (!b->irq_armed)
 		return;
 
 	if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) {
@@ -49,10 +51,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
 	 * to process the pending interrupt (e.g, low priority task on a loaded
 	 * system) and wait until it sleeps before declaring a missed interrupt.
 	 */
-	if (!intel_engine_wakeup(engine)) {
+	spin_lock_irqsave(&b->lock, flags);
+	wait = b->first_wait;
+	if (!wait || !wake_up_process(wait->tsk)) {
 		mod_timer(&b->hangcheck, wait_timeout());
-		return;
+		wait = NULL;
 	}
+	spin_unlock_irqrestore(&b->lock, flags);
+	if (!wait)
+		return;
 
 	DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
 	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
@@ -110,6 +117,34 @@ static void irq_disable(struct intel_engine_cs *engine)
 	spin_unlock(&engine->i915->irq_lock);
 }
 
+void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	assert_spin_locked(&b->lock);
+
+	if (b->irq_enabled) {
+		irq_disable(engine);
+		b->irq_enabled = false;
+	}
+
+	b->irq_armed = false;
+}
+
+void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	unsigned long flags;
+
+	if (!b->irq_armed)
+		return;
+
+	spin_lock_irqsave(&b->lock, flags);
+	if (!intel_engine_has_waiter(engine))
+		__intel_engine_disarm_breadcrumbs(engine);
+	spin_unlock_irqrestore(&b->lock, flags);
+}
+
 static bool use_fake_irq(const struct intel_breadcrumbs *b)
 {
 	const struct intel_engine_cs *engine =
@@ -134,9 +169,17 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
 	struct drm_i915_private *i915 = engine->i915;
 
 	assert_spin_locked(&b->lock);
-	if (b->rpm_wakelock)
+	if (b->irq_armed)
 		return;
 
+	/* The breadcrumb irq will be disarmed on the interrupt after the
+	 * waiters are signaled. This gives us a single interrupt window in
+	 * which we can add a new waiter and avoid the cost of re-enabling
+	 * the irq.
+	 */
+	b->irq_armed = true;
+	GEM_BUG_ON(b->irq_enabled);
+
 	if (I915_SELFTEST_ONLY(b->mock)) {
 		/* For our mock objects we want to avoid interaction
 		 * with the real hardware (which is not set up). So
@@ -145,17 +188,15 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
 		 * implementation to call intel_engine_wakeup()
 		 * itself when it wants to simulate a user interrupt,
 		 */
-		b->rpm_wakelock = true;
 		return;
 	}
 
 	/* Since we are waiting on a request, the GPU should be busy
-	 * and should have its own rpm reference. For completeness,
-	 * record an rpm reference for ourselves to cover the
-	 * interrupt we unmask.
+	 * and should have its own rpm reference. This is tracked
+	 * by i915->gt.awake, we can forgo holding our own wakref
+	 * for the interrupt as before i915->gt.awake is released (when
+	 * the driver is idle) we disarm the breadcrumbs.
 	 */
-	intel_runtime_pm_get_noresume(i915);
-	b->rpm_wakelock = true;
 
 	/* No interrupts? Kick the waiter every jiffie! */
 	if (intel_irqs_enabled(i915)) {
@@ -173,29 +214,6 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
 	}
 }
 
-static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
-{
-	struct intel_engine_cs *engine =
-		container_of(b, struct intel_engine_cs, breadcrumbs);
-
-	assert_spin_locked(&b->lock);
-	if (!b->rpm_wakelock)
-		return;
-
-	if (I915_SELFTEST_ONLY(b->mock)) {
-		b->rpm_wakelock = false;
-		return;
-	}
-
-	if (b->irq_enabled) {
-		irq_disable(engine);
-		b->irq_enabled = false;
-	}
-
-	intel_runtime_pm_put(engine->i915);
-	b->rpm_wakelock = false;
-}
-
 static inline struct intel_wait *to_wait(struct rb_node *node)
 {
 	return rb_entry(node, struct intel_wait, node);
@@ -415,7 +433,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
 			wake_up_process(b->first_wait->tsk);
 		} else {
 			b->first_wait = NULL;
-			__intel_breadcrumbs_disable_irq(b);
 		}
 	} else {
 		GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
@@ -707,14 +724,16 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 	cancel_fake_irq(engine);
 	spin_lock_irq(&b->lock);
 
-	__intel_breadcrumbs_disable_irq(b);
+	if (b->irq_enabled)
+		irq_enable(engine);
+	else
+		irq_disable(engine);
+
 	if (intel_engine_has_waiter(engine)) {
-		__intel_breadcrumbs_enable_irq(b);
 		if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted))
 			wake_up_process(b->first_wait->tsk);
-	} else {
+		mod_timer(&b->hangcheck, wait_timeout());
 		/* sanitize the IMR and unmask any auxiliary interrupts */
-		irq_disable(engine);
 	}
 
 	spin_unlock_irq(&b->lock);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c90b60f47f25..9f89160c7de9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -246,8 +246,8 @@ struct intel_engine_cs {
 
 		unsigned int hangcheck_interrupts;
 
+		bool irq_armed : 1;
 		bool irq_enabled : 1;
-		bool rpm_wakelock : 1;
 		I915_SELFTEST_DECLARE(bool mock : 1);
 	} breadcrumbs;
 
@@ -657,6 +657,8 @@ static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
 	return wakeup;
 }
 
+void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [v2,1/2] drm/i915: signal first fence from irq handler if complete
  2017-02-23 15:42 [PATCH v2 1/2] drm/i915: signal first fence from irq handler if complete Chris Wilson
  2017-02-23 15:42 ` [PATCH v2 2/2] drm/i915: Delay disabling the user interrupt for breadcrumbs Chris Wilson
@ 2017-02-23 17:23 ` Patchwork
  2017-02-24 10:04 ` [PATCH v2 1/2] " Tvrtko Ursulin
  2 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2017-02-23 17:23 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2,1/2] drm/i915: signal first fence from irq handler if complete
URL   : https://patchwork.freedesktop.org/series/20153/
State : failure

== Summary ==

Series 20153v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/20153/revisions/1/mbox/

Test gem_ringfill:
        Subgroup basic-default-hang:
                pass       -> TIMEOUT    (fi-snb-2520m)
                pass       -> TIMEOUT    (fi-hsw-4770)
Test gem_wait:
        Subgroup basic-await-all:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup basic-wait-all:
                pass       -> INCOMPLETE (fi-snb-2520m)
Test gem_workarounds:
        Subgroup basic-read:
                pass       -> INCOMPLETE (fi-snb-2520m)
Test kms_addfb_basic:
        Subgroup addfb25-bad-modifier:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup addfb25-framebuffer-vs-set-tiling:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup addfb25-modifier-no-flag:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup addfb25-x-tiled:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup addfb25-x-tiled-mismatch:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup addfb25-y-tiled:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup addfb25-y-tiled-small:
                skip       -> INCOMPLETE (fi-snb-2520m)
        Subgroup addfb25-yf-tiled:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup bad-pitch-0:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup bad-pitch-1024:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup bad-pitch-128:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup bad-pitch-256:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup bad-pitch-32:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup bad-pitch-63:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup bad-pitch-65536:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup bad-pitch-999:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup basic:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup basic-x-tiled:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup basic-y-tiled:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup bo-too-small:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup bo-too-small-due-to-tiling:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup clobberred-modifier:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup framebuffer-vs-set-tiling:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup invalid-get-prop:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup invalid-get-prop-any:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup invalid-set-prop:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup invalid-set-prop-any:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup no-handle:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup size-max:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup small-bo:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup tile-pitch-mismatch:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup too-high:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup too-wide:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup unused-handle:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup unused-modifier:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup unused-offsets:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup unused-pitches:
                pass       -> INCOMPLETE (fi-snb-2520m)
Test kms_busy:
        Subgroup basic-flip-default-a:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup basic-flip-default-b:
                pass       -> INCOMPLETE (fi-snb-2520m)
        Subgroup basic-flip-default-c:
                skip       -> INCOMPLETE (fi-snb-2520m)
Test kms_cursor_legacy:
        Subgroup basic-busy-flip-before-cursor-atomic:
                pass       -> INCOMPLETE (fi-snb-2520m)
WARNING: Long output truncated

53a95c930ad728b11cc2b21e42b4bd9dcd306400 drm-tip: 2017y-02m-23d-16h-19m-22s UTC integration manifest
9441d33 drm/i915: Delay disabling the user interrupt for breadcrumbs
06d383a drm/i915: signal first fence from irq handler if complete

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3949/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] drm/i915: signal first fence from irq handler if complete
  2017-02-23 15:42 [PATCH v2 1/2] drm/i915: signal first fence from irq handler if complete Chris Wilson
  2017-02-23 15:42 ` [PATCH v2 2/2] drm/i915: Delay disabling the user interrupt for breadcrumbs Chris Wilson
  2017-02-23 17:23 ` ✗ Fi.CI.BAT: failure for series starting with [v2,1/2] drm/i915: signal first fence from irq handler if complete Patchwork
@ 2017-02-24 10:04 ` Tvrtko Ursulin
  2017-02-24 10:19   ` Chris Wilson
  2 siblings, 1 reply; 10+ messages in thread
From: Tvrtko Ursulin @ 2017-02-24 10:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 23/02/2017 15:42, Chris Wilson wrote:
> As execlists and other non-semaphore multi-engine devices coordinate
> between engines using interrupts, we can shave off a few 10s of
> microsecond of scheduling latency by doing the fence signaling from the
> interrupt as opposed to a RT kthread. (Realistically the delay adds
> about 1% to an individual cross-engine workload.) We only signal the
> first fence in order to limit the amount of work we move into the
> interrupt handler. We also have to remember that our breadcrumbs may be
> unordered with respect to the interrupt and so we still require the
> waiter process to perform some heavyweight coherency fixups, as well as
> traversing the tree of waiters.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h          | 13 ++++++------
>  drivers/gpu/drm/i915/i915_gem_request.c  |  2 +-
>  drivers/gpu/drm/i915/i915_gem_request.h  |  2 ++
>  drivers/gpu/drm/i915/i915_irq.c          | 35 +++++++++++++++++++++++++++++---
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 17 ++++++----------
>  drivers/gpu/drm/i915/intel_ringbuffer.h  | 33 +++++++++++-------------------
>  6 files changed, 59 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index eed9ead1b592..07a69e1ff1c3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -4065,9 +4065,9 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
>  	 * is woken.
>  	 */
>  	if (engine->irq_seqno_barrier &&
> -	    rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current &&
>  	    test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) {
> -		struct task_struct *tsk;
> +		struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +		unsigned long flags;
>
>  		/* The ordering of irq_posted versus applying the barrier
>  		 * is crucial. The clearing of the current irq_posted must
> @@ -4089,17 +4089,16 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
>  		 * the seqno before we believe it coherent since they see
>  		 * irq_posted == false but we are still running).
>  		 */
> -		rcu_read_lock();
> -		tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
> -		if (tsk && tsk != current)
> +		spin_lock_irqsave(&b->lock, flags);
> +		if (b->first_wait && b->first_wait->tsk != current)
>  			/* Note that if the bottom-half is changed as we
>  			 * are sending the wake-up, the new bottom-half will
>  			 * be woken by whomever made the change. We only have
>  			 * to worry about when we steal the irq-posted for
>  			 * ourself.
>  			 */
> -			wake_up_process(tsk);
> -		rcu_read_unlock();
> +			wake_up_process(b->first_wait->tsk);
> +		spin_unlock_irqrestore(&b->lock, flags);
>
>  		if (__i915_gem_request_completed(req, seqno))
>  			return true;
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index fbfeb5f8d069..77c3ee7a3fd0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -1083,7 +1083,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  	if (flags & I915_WAIT_LOCKED)
>  		add_wait_queue(errq, &reset);
>
> -	intel_wait_init(&wait);
> +	intel_wait_init(&wait, req);
>
>  restart:
>  	do {
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 5f73d8c0a38a..0efee879df23 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -32,10 +32,12 @@
>
>  struct drm_file;
>  struct drm_i915_gem_object;
> +struct drm_i915_gem_request;
>
>  struct intel_wait {
>  	struct rb_node node;
>  	struct task_struct *tsk;
> +	struct drm_i915_gem_request *request;
>  	u32 seqno;

Hm, do we now have a situation where both waiters and signallers hold a 
reference to the request so could drop the seqno field from here?

>  };
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index bc70e2c451b2..3c79753edf0e 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1033,12 +1033,41 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
>
>  static void notify_ring(struct intel_engine_cs *engine)
>  {
> -	bool waiters;
> +	struct drm_i915_gem_request *rq = NULL;
> +	struct intel_wait *wait;
> +
> +	if (!intel_engine_has_waiter(engine))
> +		return;

I think we need the tracepoint before the bail out.

>
>  	atomic_inc(&engine->irq_count);

And the increment as well.

>  	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
> -	waiters = intel_engine_wakeup(engine);
> -	trace_intel_engine_notify(engine, waiters);
> +
> +	spin_lock(&engine->breadcrumbs.lock);
> +	wait = engine->breadcrumbs.first_wait;
> +	if (wait) {
> +		/* We use a callback from the dma-fence to submit
> +		 * requests after waiting on our own requests. To
> +		 * ensure minimum delay in queuing the next request to
> +		 * hardware, signal the fence now rather than wait for
> +		 * the signaler to be woken up. We still wake up the
> +		 * waiter in order to handle the irq-seqno coherency
> +		 * issues (we may receive the interrupt before the
> +		 * seqno is written, see __i915_request_irq_complete())
> +		 * and to handle coalescing of multiple seqno updates
> +		 * and many waiters.
> +		 */
> +		if (i915_seqno_passed(intel_engine_get_seqno(engine),
> +				      wait->seqno))
> +			rq = wait->request;
> +
> +		wake_up_process(wait->tsk);
> +	}
> +	spin_unlock(&engine->breadcrumbs.lock);
> +
> +	if (rq) /* rq is protected by RCU, so safe from hard-irq context */
> +		dma_fence_signal(&rq->fence);
> +
> +	trace_intel_engine_notify(engine, wait);
>  }
>
>  static void vlv_c0_read(struct drm_i915_private *dev_priv,
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 027c93e34c97..5f2665aa817c 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -81,8 +81,11 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
>  	 * every jiffie in order to kick the oldest waiter to do the
>  	 * coherent seqno check.
>  	 */
> -	if (intel_engine_wakeup(engine))
> -		mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
> +	if (!intel_engine_has_waiter(engine))
> +		return;
> +
> +	intel_engine_wakeup(engine);
> +	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);

Not sure it is worth splitting this in two instead of just leaving it as 
it was.

>  }
>
>  static void irq_enable(struct intel_engine_cs *engine)
> @@ -275,7 +278,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
>  	}
>  	rb_link_node(&wait->node, parent, p);
>  	rb_insert_color(&wait->node, &b->waiters);
> -	GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh));
>
>  	if (completed) {
>  		struct rb_node *next = rb_next(completed);
> @@ -284,7 +286,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
>  		if (next && next != &wait->node) {
>  			GEM_BUG_ON(first);
>  			b->first_wait = to_wait(next);
> -			rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
>  			/* As there is a delay between reading the current
>  			 * seqno, processing the completed tasks and selecting
>  			 * the next waiter, we may have missed the interrupt
> @@ -311,7 +312,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
>  	if (first) {
>  		GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
>  		b->first_wait = wait;
> -		rcu_assign_pointer(b->irq_seqno_bh, wait->tsk);
>  		/* After assigning ourselves as the new bottom-half, we must
>  		 * perform a cursory check to prevent a missed interrupt.
>  		 * Either we miss the interrupt whilst programming the hardware,
> @@ -322,7 +322,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
>  		 */
>  		__intel_breadcrumbs_enable_irq(b);
>  	}
> -	GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh));
>  	GEM_BUG_ON(!b->first_wait);
>  	GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
>
> @@ -370,8 +369,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
>  		const int priority = wakeup_priority(b, wait->tsk);
>  		struct rb_node *next;
>
> -		GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk);
> -
>  		/* We are the current bottom-half. Find the next candidate,
>  		 * the first waiter in the queue on the remaining oldest
>  		 * request. As multiple seqnos may complete in the time it
> @@ -413,13 +410,11 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
>  			 * exception rather than a seqno completion.
>  			 */
>  			b->first_wait = to_wait(next);
> -			rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
>  			if (b->first_wait->seqno != wait->seqno)
>  				__intel_breadcrumbs_enable_irq(b);
>  			wake_up_process(b->first_wait->tsk);
>  		} else {
>  			b->first_wait = NULL;
> -			rcu_assign_pointer(b->irq_seqno_bh, NULL);
>  			__intel_breadcrumbs_disable_irq(b);
>  		}
>  	} else {
> @@ -433,7 +428,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
>  	GEM_BUG_ON(b->first_wait == wait);
>  	GEM_BUG_ON(rb_first(&b->waiters) !=
>  		   (b->first_wait ? &b->first_wait->node : NULL));
> -	GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters));
>  }
>
>  void intel_engine_remove_wait(struct intel_engine_cs *engine,
> @@ -595,6 +589,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
>  		return;
>
>  	request->signaling.wait.tsk = b->signaler;
> +	request->signaling.wait.request = request;
>  	request->signaling.wait.seqno = seqno;
>  	i915_gem_request_get(request);
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 0f29e07a9581..c90b60f47f25 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -235,8 +235,6 @@ struct intel_engine_cs {
>  	 * the overhead of waking that client is much preferred.
>  	 */
>  	struct intel_breadcrumbs {
> -		struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */
> -
>  		spinlock_t lock; /* protects the lists of requests; irqsafe */
>  		struct rb_root waiters; /* sorted by retirement, priority */
>  		struct rb_root signals; /* sorted by retirement */
> @@ -582,9 +580,11 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
>  /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
>  int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
>
> -static inline void intel_wait_init(struct intel_wait *wait)
> +static inline void intel_wait_init(struct intel_wait *wait,
> +				   struct drm_i915_gem_request *rq)
>  {
>  	wait->tsk = current;
> +	wait->request = rq;
>  }
>
>  static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
> @@ -639,29 +639,20 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
>
>  static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
>  {
> -	return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh);
> +	return READ_ONCE(engine->breadcrumbs.first_wait);
>  }
>
> -static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
> +static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
>  {
> +	struct intel_wait *wait;
> +	unsigned long flags;
>  	bool wakeup = false;
>
> -	/* Note that for this not to dangerously chase a dangling pointer,
> -	 * we must hold the rcu_read_lock here.
> -	 *
> -	 * Also note that tsk is likely to be in !TASK_RUNNING state so an
> -	 * early test for tsk->state != TASK_RUNNING before wake_up_process()
> -	 * is unlikely to be beneficial.
> -	 */
> -	if (intel_engine_has_waiter(engine)) {
> -		struct task_struct *tsk;
> -
> -		rcu_read_lock();
> -		tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
> -		if (tsk)
> -			wakeup = wake_up_process(tsk);
> -		rcu_read_unlock();
> -	}
> +	spin_lock_irqsave(&engine->breadcrumbs.lock, flags);
> +	wait = engine->breadcrumbs.first_wait;
> +	if (wait)
> +		wakeup = wake_up_process(wait->tsk);
> +	spin_unlock_irqrestore(&engine->breadcrumbs.lock, flags);
>
>  	return wakeup;
>  }
>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] drm/i915: signal first fence from irq handler if complete
  2017-02-24 10:04 ` [PATCH v2 1/2] " Tvrtko Ursulin
@ 2017-02-24 10:19   ` Chris Wilson
  2017-02-24 10:26     ` Chris Wilson
  2017-02-24 10:47     ` Tvrtko Ursulin
  0 siblings, 2 replies; 10+ messages in thread
From: Chris Wilson @ 2017-02-24 10:19 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Feb 24, 2017 at 10:04:23AM +0000, Tvrtko Ursulin wrote:
> 
> On 23/02/2017 15:42, Chris Wilson wrote:
> >diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> >index 5f73d8c0a38a..0efee879df23 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_request.h
> >+++ b/drivers/gpu/drm/i915/i915_gem_request.h
> >@@ -32,10 +32,12 @@
> >
> > struct drm_file;
> > struct drm_i915_gem_object;
> >+struct drm_i915_gem_request;
> >
> > struct intel_wait {
> > 	struct rb_node node;
> > 	struct task_struct *tsk;
> >+	struct drm_i915_gem_request *request;
> > 	u32 seqno;
> 
> Hm, do we now have a situation where both waiters and signallers
> hold a reference to the request so could drop the seqno field from
> here?

Keeping the seqno field helps with the lowlevel testcase that works
without requests. Not scientific, but it was one less pointer for the
irq handler.

> > };
> >
> >diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> >index bc70e2c451b2..3c79753edf0e 100644
> >--- a/drivers/gpu/drm/i915/i915_irq.c
> >+++ b/drivers/gpu/drm/i915/i915_irq.c
> >@@ -1033,12 +1033,41 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
> >
> > static void notify_ring(struct intel_engine_cs *engine)
> > {
> >-	bool waiters;
> >+	struct drm_i915_gem_request *rq = NULL;
> >+	struct intel_wait *wait;
> >+
> >+	if (!intel_engine_has_waiter(engine))
> >+		return;
> 
> I think we need the tracepoint before the bail out.
> 
> >
> > 	atomic_inc(&engine->irq_count);
> 
> And the increment as well.
> 
> > 	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
> >-	waiters = intel_engine_wakeup(engine);
> >-	trace_intel_engine_notify(engine, waiters);
> >+
> >+	spin_lock(&engine->breadcrumbs.lock);
> >+	wait = engine->breadcrumbs.first_wait;
> >+	if (wait) {
> >+		/* We use a callback from the dma-fence to submit
> >+		 * requests after waiting on our own requests. To
> >+		 * ensure minimum delay in queuing the next request to
> >+		 * hardware, signal the fence now rather than wait for
> >+		 * the signaler to be woken up. We still wake up the
> >+		 * waiter in order to handle the irq-seqno coherency
> >+		 * issues (we may receive the interrupt before the
> >+		 * seqno is written, see __i915_request_irq_complete())
> >+		 * and to handle coalescing of multiple seqno updates
> >+		 * and many waiters.
> >+		 */
> >+		if (i915_seqno_passed(intel_engine_get_seqno(engine),
> >+				      wait->seqno))
> >+			rq = wait->request;
> >+
> >+		wake_up_process(wait->tsk);
> >+	}
> >+	spin_unlock(&engine->breadcrumbs.lock);
> >+
> >+	if (rq) /* rq is protected by RCU, so safe from hard-irq context */
> >+		dma_fence_signal(&rq->fence);
> >+
> >+	trace_intel_engine_notify(engine, wait);

Hmm, I put the trace here for access to wait. I guess we only need a
READ_ONCE(engine->breadcrumbs.first_wait) for tracking?

Note in the next patch, it's different again. And yeah I should massage
it such that it doesn't introduce a fluctuation.

> > }
> >
> > static void vlv_c0_read(struct drm_i915_private *dev_priv,
> >diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >index 027c93e34c97..5f2665aa817c 100644
> >--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >@@ -81,8 +81,11 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
> > 	 * every jiffie in order to kick the oldest waiter to do the
> > 	 * coherent seqno check.
> > 	 */
> >-	if (intel_engine_wakeup(engine))
> >-		mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
> >+	if (!intel_engine_has_waiter(engine))
> >+		return;
> >+
> >+	intel_engine_wakeup(engine);
> >+	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
> 
> Not sure it is worth splitting this in two instead of just leaving
> it as it was.

The problem with the previous code was the timer stopped if the waiter
was running at that moment. I am still playing to find something that
doesn't look horrible. Just going back to the bool intel_engine_wakeup,
which is now far too large to be an inline for an unimportant function.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] drm/i915: signal first fence from irq handler if complete
  2017-02-24 10:19   ` Chris Wilson
@ 2017-02-24 10:26     ` Chris Wilson
  2017-02-24 10:47     ` Tvrtko Ursulin
  1 sibling, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2017-02-24 10:26 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

On Fri, Feb 24, 2017 at 10:19:07AM +0000, Chris Wilson wrote:
> On Fri, Feb 24, 2017 at 10:04:23AM +0000, Tvrtko Ursulin wrote:
> > >diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> > >index 027c93e34c97..5f2665aa817c 100644
> > >--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> > >+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> > >@@ -81,8 +81,11 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
> > > 	 * every jiffie in order to kick the oldest waiter to do the
> > > 	 * coherent seqno check.
> > > 	 */
> > >-	if (intel_engine_wakeup(engine))
> > >-		mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
> > >+	if (!intel_engine_has_waiter(engine))
> > >+		return;
> > >+
> > >+	intel_engine_wakeup(engine);
> > >+	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
> > 
> > Not sure it is worth splitting this in two instead of just leaving
> > it as it was.
> 
> The problem with the previous code was the timer stopped if the waiter
> was running at that moment. I am still playing to find something that
> doesn't look horrible. Just going back to the bool intel_engine_wakeup,
> which is now far too large to be an inline for an unimportant function.

Ah, found the complication. Here we want intel_engine_wakeup() to report
if there was a waiter, but in intel_breadcrumbs_hangcheck, we want to
know if we failed to wakeup the waiter.

Just grin and bear it for the moment. :|
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] drm/i915: Delay disabling the user interrupt for breadcrumbs
  2017-02-23 15:42 ` [PATCH v2 2/2] drm/i915: Delay disabling the user interrupt for breadcrumbs Chris Wilson
@ 2017-02-24 10:34   ` Tvrtko Ursulin
  2017-02-24 10:47     ` Chris Wilson
  0 siblings, 1 reply; 10+ messages in thread
From: Tvrtko Ursulin @ 2017-02-24 10:34 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 23/02/2017 15:42, Chris Wilson wrote:
> A significant cost in setting up a wait is the overhead of enabling the
> interrupt. As we disable the interrupt whenever the queue of waiters is
> empty, if we are frequently waiting on alternating batches, we end up
> re-enabling the interrupt on a frequent basis. We do want to disable the
> interrupt during normal operations as under high load it may add several
> thousand interrupts/s - we have been known in the past to occupy whole
> cores with our interrupt handler after accidentally leaving user
> interrupts enabled. As a compromise, leave the interrupt enabled until
> the next IRQ, or the system is idle. This gives a small window for a
> waiter to keep the interrupt active and not be delayed by having to
> re-enable the interrupt.
>
> v2: Restore hangcheck/missed-irq detection for continuations
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c          |  4 +-
>  drivers/gpu/drm/i915/i915_irq.c          |  5 +-
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 95 +++++++++++++++++++-------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 +-
>  4 files changed, 65 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a8167003c10b..3ec8c948fe45 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2988,8 +2988,10 @@ i915_gem_idle_work_handler(struct work_struct *work)
>  	if (wait_for(intel_execlists_idle(dev_priv), 10))
>  		DRM_ERROR("Timeout waiting for engines to idle\n");
>
> -	for_each_engine(engine, dev_priv, id)
> +	for_each_engine(engine, dev_priv, id) {
> +		intel_engine_disarm_breadcrumbs(engine);
>  		i915_gem_batch_pool_fini(&engine->batch_pool);
> +	}
>
>  	GEM_BUG_ON(!dev_priv->gt.awake);
>  	dev_priv->gt.awake = false;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 3c79753edf0e..ba0bb33e12ed 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1036,9 +1036,6 @@ static void notify_ring(struct intel_engine_cs *engine)
>  	struct drm_i915_gem_request *rq = NULL;
>  	struct intel_wait *wait;
>
> -	if (!intel_engine_has_waiter(engine))
> -		return;
> -
>  	atomic_inc(&engine->irq_count);
>  	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
>
> @@ -1061,6 +1058,8 @@ static void notify_ring(struct intel_engine_cs *engine)
>  			rq = wait->request;
>
>  		wake_up_process(wait->tsk);
> +	} else {
> +		__intel_engine_disarm_breadcrumbs(engine);
>  	}
>  	spin_unlock(&engine->breadcrumbs.lock);
>
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 5f2665aa817c..bf14022f6adb 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -35,8 +35,10 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
>  {
>  	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
>  	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +	struct intel_wait *wait;
> +	unsigned long flags;
>
> -	if (!b->irq_enabled)
> +	if (!b->irq_armed)
>  		return;
>
>  	if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) {
> @@ -49,10 +51,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
>  	 * to process the pending interrupt (e.g, low priority task on a loaded
>  	 * system) and wait until it sleeps before declaring a missed interrupt.
>  	 */
> -	if (!intel_engine_wakeup(engine)) {
> +	spin_lock_irqsave(&b->lock, flags);
> +	wait = b->first_wait;
> +	if (!wait || !wake_up_process(wait->tsk)) {
>  		mod_timer(&b->hangcheck, wait_timeout());
> -		return;
> +		wait = NULL;
>  	}
> +	spin_unlock_irqrestore(&b->lock, flags);
> +	if (!wait)
> +		return;

I don't see a difference versus the old code:

if (!intel_engine_wakeup(engine)) {
	mod_timer(...);
	return;
}

There is only one instance of the timer at a time so we don't need to 
extend the lock to mod_timer. Or you were thinking 
intel_engine_reset_breadcrumbs? But there we do del_timer_sync before 
proceeding to touch the timer.

>
>  	DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
>  	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
> @@ -110,6 +117,34 @@ static void irq_disable(struct intel_engine_cs *engine)
>  	spin_unlock(&engine->i915->irq_lock);
>  }
>
> +void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> +{
> +	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> +	assert_spin_locked(&b->lock);
> +
> +	if (b->irq_enabled) {
> +		irq_disable(engine);
> +		b->irq_enabled = false;
> +	}
> +
> +	b->irq_armed = false;
> +}
> +
> +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> +{
> +	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +	unsigned long flags;
> +
> +	if (!b->irq_armed)
> +		return;

Sounds safer to put this check under the spinlock so reader can think 
less. :) Or in fact do we even need the check? It's only the idle work 
handler so could just have the below block I think.

> +
> +	spin_lock_irqsave(&b->lock, flags);
> +	if (!intel_engine_has_waiter(engine))
> +		__intel_engine_disarm_breadcrumbs(engine);
> +	spin_unlock_irqrestore(&b->lock, flags);
> +}
> +
>  static bool use_fake_irq(const struct intel_breadcrumbs *b)
>  {
>  	const struct intel_engine_cs *engine =
> @@ -134,9 +169,17 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
>  	struct drm_i915_private *i915 = engine->i915;
>
>  	assert_spin_locked(&b->lock);
> -	if (b->rpm_wakelock)
> +	if (b->irq_armed)
>  		return;
>
> +	/* The breadcrumb irq will be disarmed on the interrupt after the
> +	 * waiters are signaled. This gives us a single interrupt window in
> +	 * which we can add a new waiter and avoid the cost of re-enabling
> +	 * the irq.
> +	 */
> +	b->irq_armed = true;
> +	GEM_BUG_ON(b->irq_enabled);
> +
>  	if (I915_SELFTEST_ONLY(b->mock)) {
>  		/* For our mock objects we want to avoid interaction
>  		 * with the real hardware (which is not set up). So
> @@ -145,17 +188,15 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
>  		 * implementation to call intel_engine_wakeup()
>  		 * itself when it wants to simulate a user interrupt,
>  		 */
> -		b->rpm_wakelock = true;
>  		return;
>  	}
>
>  	/* Since we are waiting on a request, the GPU should be busy
> -	 * and should have its own rpm reference. For completeness,
> -	 * record an rpm reference for ourselves to cover the
> -	 * interrupt we unmask.
> +	 * and should have its own rpm reference. This is tracked
> +	 * by i915->gt.awake, we can forgo holding our own wakref
> +	 * for the interrupt as before i915->gt.awake is released (when
> +	 * the driver is idle) we disarm the breadcrumbs.
>  	 */
> -	intel_runtime_pm_get_noresume(i915);
> -	b->rpm_wakelock = true;
>
>  	/* No interrupts? Kick the waiter every jiffie! */
>  	if (intel_irqs_enabled(i915)) {
> @@ -173,29 +214,6 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
>  	}
>  }
>
> -static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
> -{
> -	struct intel_engine_cs *engine =
> -		container_of(b, struct intel_engine_cs, breadcrumbs);
> -
> -	assert_spin_locked(&b->lock);
> -	if (!b->rpm_wakelock)
> -		return;
> -
> -	if (I915_SELFTEST_ONLY(b->mock)) {
> -		b->rpm_wakelock = false;
> -		return;
> -	}
> -
> -	if (b->irq_enabled) {
> -		irq_disable(engine);
> -		b->irq_enabled = false;
> -	}
> -
> -	intel_runtime_pm_put(engine->i915);
> -	b->rpm_wakelock = false;
> -}
> -
>  static inline struct intel_wait *to_wait(struct rb_node *node)
>  {
>  	return rb_entry(node, struct intel_wait, node);
> @@ -415,7 +433,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
>  			wake_up_process(b->first_wait->tsk);
>  		} else {
>  			b->first_wait = NULL;
> -			__intel_breadcrumbs_disable_irq(b);
>  		}
>  	} else {
>  		GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
> @@ -707,14 +724,16 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
>  	cancel_fake_irq(engine);
>  	spin_lock_irq(&b->lock);
>
> -	__intel_breadcrumbs_disable_irq(b);
> +	if (b->irq_enabled)
> +		irq_enable(engine);
> +	else
> +		irq_disable(engine);
> +
>  	if (intel_engine_has_waiter(engine)) {
> -		__intel_breadcrumbs_enable_irq(b);
>  		if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted))
>  			wake_up_process(b->first_wait->tsk);
> -	} else {
> +		mod_timer(&b->hangcheck, wait_timeout());
>  		/* sanitize the IMR and unmask any auxiliary interrupts */
> -		irq_disable(engine);
>  	}
>
>  	spin_unlock_irq(&b->lock);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c90b60f47f25..9f89160c7de9 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -246,8 +246,8 @@ struct intel_engine_cs {
>
>  		unsigned int hangcheck_interrupts;
>
> +		bool irq_armed : 1;
>  		bool irq_enabled : 1;
> -		bool rpm_wakelock : 1;
>  		I915_SELFTEST_DECLARE(bool mock : 1);
>  	} breadcrumbs;
>
> @@ -657,6 +657,8 @@ static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
>  	return wakeup;
>  }
>
> +void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
> +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
>  void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
>  void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
>  bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
>

The rest looks OK, but I wonder if I missed something since the CI 
wasn't happy with it?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] drm/i915: Delay disabling the user interrupt for breadcrumbs
  2017-02-24 10:34   ` Tvrtko Ursulin
@ 2017-02-24 10:47     ` Chris Wilson
  0 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2017-02-24 10:47 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Feb 24, 2017 at 10:34:07AM +0000, Tvrtko Ursulin wrote:
> 
> On 23/02/2017 15:42, Chris Wilson wrote:
> >A significant cost in setting up a wait is the overhead of enabling the
> >interrupt. As we disable the interrupt whenever the queue of waiters is
> >empty, if we are frequently waiting on alternating batches, we end up
> >re-enabling the interrupt on a frequent basis. We do want to disable the
> >interrupt during normal operations as under high load it may add several
> >thousand interrupts/s - we have been known in the past to occupy whole
> >cores with our interrupt handler after accidentally leaving user
> >interrupts enabled. As a compromise, leave the interrupt enabled until
> >the next IRQ, or the system is idle. This gives a small window for a
> >waiter to keep the interrupt active and not be delayed by having to
> >re-enable the interrupt.
> >
> >v2: Restore hangcheck/missed-irq detection for continuations
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> >---
> > drivers/gpu/drm/i915/i915_gem.c          |  4 +-
> > drivers/gpu/drm/i915/i915_irq.c          |  5 +-
> > drivers/gpu/drm/i915/intel_breadcrumbs.c | 95 +++++++++++++++++++-------------
> > drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 +-
> > 4 files changed, 65 insertions(+), 43 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >index a8167003c10b..3ec8c948fe45 100644
> >--- a/drivers/gpu/drm/i915/i915_gem.c
> >+++ b/drivers/gpu/drm/i915/i915_gem.c
> >@@ -2988,8 +2988,10 @@ i915_gem_idle_work_handler(struct work_struct *work)
> > 	if (wait_for(intel_execlists_idle(dev_priv), 10))
> > 		DRM_ERROR("Timeout waiting for engines to idle\n");
> >
> >-	for_each_engine(engine, dev_priv, id)
> >+	for_each_engine(engine, dev_priv, id) {
> >+		intel_engine_disarm_breadcrumbs(engine);
> > 		i915_gem_batch_pool_fini(&engine->batch_pool);
> >+	}
> >
> > 	GEM_BUG_ON(!dev_priv->gt.awake);
> > 	dev_priv->gt.awake = false;
> >diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> >index 3c79753edf0e..ba0bb33e12ed 100644
> >--- a/drivers/gpu/drm/i915/i915_irq.c
> >+++ b/drivers/gpu/drm/i915/i915_irq.c
> >@@ -1036,9 +1036,6 @@ static void notify_ring(struct intel_engine_cs *engine)
> > 	struct drm_i915_gem_request *rq = NULL;
> > 	struct intel_wait *wait;
> >
> >-	if (!intel_engine_has_waiter(engine))
> >-		return;
> >-
> > 	atomic_inc(&engine->irq_count);
> > 	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
> >
> >@@ -1061,6 +1058,8 @@ static void notify_ring(struct intel_engine_cs *engine)
> > 			rq = wait->request;
> >
> > 		wake_up_process(wait->tsk);
> >+	} else {
> >+		__intel_engine_disarm_breadcrumbs(engine);
> > 	}
> > 	spin_unlock(&engine->breadcrumbs.lock);
> >
> >diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >index 5f2665aa817c..bf14022f6adb 100644
> >--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >@@ -35,8 +35,10 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
> > {
> > 	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
> > 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> >+	struct intel_wait *wait;
> >+	unsigned long flags;
> >
> >-	if (!b->irq_enabled)
> >+	if (!b->irq_armed)
> > 		return;
> >
> > 	if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) {
> >@@ -49,10 +51,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
> > 	 * to process the pending interrupt (e.g, low priority task on a loaded
> > 	 * system) and wait until it sleeps before declaring a missed interrupt.
> > 	 */
> >-	if (!intel_engine_wakeup(engine)) {
> >+	spin_lock_irqsave(&b->lock, flags);
> >+	wait = b->first_wait;
> >+	if (!wait || !wake_up_process(wait->tsk)) {
> > 		mod_timer(&b->hangcheck, wait_timeout());
> >-		return;
> >+		wait = NULL;
> > 	}
> >+	spin_unlock_irqrestore(&b->lock, flags);
> >+	if (!wait)
> >+		return;
> 
> I don't see a difference versus the old code:
> 
> if (!intel_engine_wakeup(engine)) {
> 	mod_timer(...);
> 	return;
> }

It used to be different and then morphed back to that! However, I do
need two different types of intel_engine_wakeup. One that reports a
successful wakeup of a sleeper and the other that reports having any
waiter.
 
> There is only one instance of the timer at a time so we don't need
> to extend the lock to mod_timer. Or you were thinking
> intel_engine_reset_breadcrumbs? But there we do del_timer_sync
> before proceeding to touch the timer.

Right, we only need the lock so far as getting wait->tsk (we could
switch to rcu at that point). All I'm looking for here is tidy code.

> > 	DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
> > 	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
> >@@ -110,6 +117,34 @@ static void irq_disable(struct intel_engine_cs *engine)
> > 	spin_unlock(&engine->i915->irq_lock);
> > }
> >
> >+void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> >+{
> >+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> >+
> >+	assert_spin_locked(&b->lock);
> >+
> >+	if (b->irq_enabled) {
> >+		irq_disable(engine);
> >+		b->irq_enabled = false;
> >+	}
> >+
> >+	b->irq_armed = false;
> >+}
> >+
> >+void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> >+{
> >+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> >+	unsigned long flags;
> >+
> >+	if (!b->irq_armed)
> >+		return;
> 
> Sounds safer to put this check under the spinlock so reader can
> think less. :) Or in fact do we even need the check? It's only the
> idle work handler so could just have the below block I think.

Killed it entirely! Rearranged the code a bit to fixup restarting after
a reset, and it evaporated.

> The rest looks OK, but I wonder if I missed something since the CI
> wasn't happy with it?

Yeah. I didn't get restarting after a reset correct. That seems to have
been the first problem that tripped CI over...
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] drm/i915: signal first fence from irq handler if complete
  2017-02-24 10:19   ` Chris Wilson
  2017-02-24 10:26     ` Chris Wilson
@ 2017-02-24 10:47     ` Tvrtko Ursulin
  2017-02-24 11:01       ` Chris Wilson
  1 sibling, 1 reply; 10+ messages in thread
From: Tvrtko Ursulin @ 2017-02-24 10:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 24/02/2017 10:19, Chris Wilson wrote:
> On Fri, Feb 24, 2017 at 10:04:23AM +0000, Tvrtko Ursulin wrote:
>>
>> On 23/02/2017 15:42, Chris Wilson wrote:
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
>>> index 5f73d8c0a38a..0efee879df23 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_request.h
>>> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
>>> @@ -32,10 +32,12 @@
>>>
>>> struct drm_file;
>>> struct drm_i915_gem_object;
>>> +struct drm_i915_gem_request;
>>>
>>> struct intel_wait {
>>> 	struct rb_node node;
>>> 	struct task_struct *tsk;
>>> +	struct drm_i915_gem_request *request;
>>> 	u32 seqno;
>>
>> Hm, do we now have a situation where both waiters and signallers
>> hold a reference to the request so could drop the seqno field from
>> here?
>
> Keeping the seqno field helps with the lowlevel testcase that works
> without requests. Not scientific, but it was one less pointer for the
> irq handler.

Ok.

>>> };
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
>>> index bc70e2c451b2..3c79753edf0e 100644
>>> --- a/drivers/gpu/drm/i915/i915_irq.c
>>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>>> @@ -1033,12 +1033,41 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
>>>
>>> static void notify_ring(struct intel_engine_cs *engine)
>>> {
>>> -	bool waiters;
>>> +	struct drm_i915_gem_request *rq = NULL;
>>> +	struct intel_wait *wait;
>>> +
>>> +	if (!intel_engine_has_waiter(engine))
>>> +		return;
>>
>> I think we need the tracepoint before the bail out.
>>
>>>
>>> 	atomic_inc(&engine->irq_count);
>>
>> And the increment as well.
>>
>>> 	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
>>> -	waiters = intel_engine_wakeup(engine);
>>> -	trace_intel_engine_notify(engine, waiters);
>>> +
>>> +	spin_lock(&engine->breadcrumbs.lock);
>>> +	wait = engine->breadcrumbs.first_wait;
>>> +	if (wait) {
>>> +		/* We use a callback from the dma-fence to submit
>>> +		 * requests after waiting on our own requests. To
>>> +		 * ensure minimum delay in queuing the next request to
>>> +		 * hardware, signal the fence now rather than wait for
>>> +		 * the signaler to be woken up. We still wake up the
>>> +		 * waiter in order to handle the irq-seqno coherency
>>> +		 * issues (we may receive the interrupt before the
>>> +		 * seqno is written, see __i915_request_irq_complete())
>>> +		 * and to handle coalescing of multiple seqno updates
>>> +		 * and many waiters.
>>> +		 */
>>> +		if (i915_seqno_passed(intel_engine_get_seqno(engine),
>>> +				      wait->seqno))
>>> +			rq = wait->request;
>>> +
>>> +		wake_up_process(wait->tsk);
>>> +	}
>>> +	spin_unlock(&engine->breadcrumbs.lock);
>>> +
>>> +	if (rq) /* rq is protected by RCU, so safe from hard-irq context */
>>> +		dma_fence_signal(&rq->fence);
>>> +
>>> +	trace_intel_engine_notify(engine, wait);
>
> Hmm, I put the trace here for access to wait. I guess we only need a
> READ_ONCE(engine->breadcrumbs.first_wait) for tracking?

So far I don't think I even used that field from the tracepoint, but I 
think we just agreed it was an useful thing to have.

Tracepoint itself is useful for deducing individual request boundaries 
with coalescing which is why I don't think it being conditional on 
waiters is correct. But I do agree that might be questionable since it 
is not reliable until the GuC scheduling backend and GuC becoming a 
default, and at that point it will make no difference where the exact 
tracepoint site is.

> Note in the next patch, it's different again. And yeah I should massage
> it such that it doesn't introduce a fluctuation.

Yes would be better. :)

>>> }
>>>
>>> static void vlv_c0_read(struct drm_i915_private *dev_priv,
>>> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
>>> index 027c93e34c97..5f2665aa817c 100644
>>> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
>>> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
>>> @@ -81,8 +81,11 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
>>> 	 * every jiffie in order to kick the oldest waiter to do the
>>> 	 * coherent seqno check.
>>> 	 */
>>> -	if (intel_engine_wakeup(engine))
>>> -		mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
>>> +	if (!intel_engine_has_waiter(engine))
>>> +		return;
>>> +
>>> +	intel_engine_wakeup(engine);
>>> +	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
>>
>> Not sure it is worth splitting this in two instead of just leaving
>> it as it was.
>
> The problem with the previous code was the timer stopped if the waiter
> was running at that moment. I am still playing to find something that
> doesn't look horrible. Just going back to the bool intel_engine_wakeup,
> which is now far too large to be an inline for an unimportant function.

[thread merge]

> Ah, found the complication. Here we want intel_engine_wakeup() to
> report if there was a waiter, but in intel_breadcrumbs_hangcheck, we
> want to know if we failed to wakeup the waiter.
>
> Just grin and bear it for the moment. :|

I've missed that detail. Makes sense.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] drm/i915: signal first fence from irq handler if complete
  2017-02-24 10:47     ` Tvrtko Ursulin
@ 2017-02-24 11:01       ` Chris Wilson
  0 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2017-02-24 11:01 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Feb 24, 2017 at 10:47:29AM +0000, Tvrtko Ursulin wrote:
> 
> On 24/02/2017 10:19, Chris Wilson wrote:
> >Ah, found the complication. Here we want intel_engine_wakeup() to
> >report if there was a waiter, but in intel_breadcrumbs_hangcheck, we
> >want to know if we failed to wakeup the waiter.
> >
> >Just grin and bear it for the moment. :|
> 
> I've missed that detail. Makes sense.

Since I ended up opencoding it in the next patch, I think I have a
better idea now to return flags showing waiter | sleeper.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-02-24 11:01 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-02-23 15:42 [PATCH v2 1/2] drm/i915: signal first fence from irq handler if complete Chris Wilson
2017-02-23 15:42 ` [PATCH v2 2/2] drm/i915: Delay disabling the user interrupt for breadcrumbs Chris Wilson
2017-02-24 10:34   ` Tvrtko Ursulin
2017-02-24 10:47     ` Chris Wilson
2017-02-23 17:23 ` ✗ Fi.CI.BAT: failure for series starting with [v2,1/2] drm/i915: signal first fence from irq handler if complete Patchwork
2017-02-24 10:04 ` [PATCH v2 1/2] " Tvrtko Ursulin
2017-02-24 10:19   ` Chris Wilson
2017-02-24 10:26     ` Chris Wilson
2017-02-24 10:47     ` Tvrtko Ursulin
2017-02-24 11:01       ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.