public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 11/25] drm/i915: Spin after waking up for an interrupt
Date: Mon, 27 Jun 2016 11:32:10 +0100	[thread overview]
Message-ID: <577100AA.1020204@linux.intel.com> (raw)
In-Reply-To: <1466849588-17558-12-git-send-email-chris@chris-wilson.co.uk>


On 25/06/16 11:12, Chris Wilson wrote:
> When waiting for an interrupt (waiting for the GPU to complete some
> work), we know we are the single waiter for the GPU. We also know when

You mean we know we were the first waiter, if we have been woken up? Or 
perhaps the single waiter for this request? (more or less, maybe close 
enough) Or also s/", we know we are/, and we know we are" ?

Regards,

Tvrtko

> the GPU has nearly completed our request (or at least started processing
> it), so after being woken and we detect that the GPU is actively working
> on our request, allow the bottom-half (the first waiter who wakes up to
> handle checking the seqno after the interrupt) to spin for a very short
> while to reduce client latencies.
>
> The impact is minimal, there was an improvement to the realtime-vs-many
> clients case, but exporting the function proves useful later.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c  |  2 +-
>   drivers/gpu/drm/i915/i915_drv.h      | 26 +++++++++++++++--------
>   drivers/gpu/drm/i915/i915_gem.c      | 40 +++++++++++++++++++++---------------
>   drivers/gpu/drm/i915/intel_display.c |  2 +-
>   drivers/gpu/drm/i915/intel_pm.c      |  4 ++--
>   5 files changed, 45 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index c81c27dca74d..dba298c7a8b8 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -663,7 +663,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
>   					   i915_gem_request_get_seqno(work->flip_queued_req),
>   					   dev_priv->next_seqno,
>   					   engine->get_seqno(engine),
> -					   i915_gem_request_completed(work->flip_queued_req, true));
> +					   i915_gem_request_completed(work->flip_queued_req));
>   			} else
>   				seq_printf(m, "Flip not associated with any ring\n");
>   			seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ed92c434f4e6..4cc372df3534 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3277,24 +3277,27 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
>   	return (int32_t)(seq1 - seq2) >= 0;
>   }
>
> -static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
> -					   bool lazy_coherency)
> +static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req)
>   {
> -	if (!lazy_coherency && req->engine->irq_seqno_barrier)
> -		req->engine->irq_seqno_barrier(req->engine);
>   	return i915_seqno_passed(req->engine->get_seqno(req->engine),
>   				 req->previous_seqno);
>   }
>
> -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
> -					      bool lazy_coherency)
> +static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req)
>   {
> -	if (!lazy_coherency && req->engine->irq_seqno_barrier)
> -		req->engine->irq_seqno_barrier(req->engine);
>   	return i915_seqno_passed(req->engine->get_seqno(req->engine),
>   				 req->seqno);
>   }
>
> +bool __i915_spin_request(const struct drm_i915_gem_request *request,
> +			 int state, unsigned long timeout_us);
> +static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
> +				     int state, unsigned long timeout_us)
> +{
> +	return (i915_gem_request_started(request) &&
> +		__i915_spin_request(request, state, timeout_us));
> +}
> +
>   int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno);
>   int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
>
> @@ -3972,6 +3975,8 @@ static inline void i915_trace_irq_get(struct intel_engine_cs *engine,
>
>   static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *engine = req->engine;
> +
>   	/* Ensure our read of the seqno is coherent so that we
>   	 * do not "miss an interrupt" (i.e. if this is the last
>   	 * request and the seqno write from the GPU is not visible
> @@ -3983,7 +3988,10 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
>   	 * but it is easier and safer to do it every time the waiter
>   	 * is woken.
>   	 */
> -	if (i915_gem_request_completed(req, false))
> +	if (engine->irq_seqno_barrier)
> +		engine->irq_seqno_barrier(engine);
> +
> +	if (i915_gem_request_completed(req))
>   		return true;
>
>   	/* We need to check whether any gpu reset happened in between
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fe5ba4aa336c..30bd0d0d917b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1375,9 +1375,9 @@ static bool busywait_stop(unsigned long timeout, unsigned cpu)
>   	return this_cpu != cpu;
>   }
>
> -static bool __i915_spin_request(struct drm_i915_gem_request *req, int state)
> +bool __i915_spin_request(const struct drm_i915_gem_request *req,
> +			 int state, unsigned long timeout_us)
>   {
> -	unsigned long timeout;
>   	unsigned cpu;
>
>   	/* When waiting for high frequency requests, e.g. during synchronous
> @@ -1390,19 +1390,15 @@ static bool __i915_spin_request(struct drm_i915_gem_request *req, int state)
>   	 * takes to sleep on a request, on the order of a microsecond.
>   	 */
>
> -	/* Only spin if we know the GPU is processing this request */
> -	if (!i915_gem_request_started(req, true))
> -		return false;
> -
> -	timeout = local_clock_us(&cpu) + 5;
> +	timeout_us += local_clock_us(&cpu);
>   	do {
> -		if (i915_gem_request_completed(req, true))
> +		if (i915_gem_request_completed(req))
>   			return true;
>
>   		if (signal_pending_state(state, current))
>   			break;
>
> -		if (busywait_stop(timeout, cpu))
> +		if (busywait_stop(timeout_us, cpu))
>   			break;
>
>   		cpu_relax_lowlatency();
> @@ -1445,7 +1441,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>   	if (list_empty(&req->list))
>   		return 0;
>
> -	if (i915_gem_request_completed(req, true))
> +	if (i915_gem_request_completed(req))
>   		return 0;
>
>   	timeout_remain = MAX_SCHEDULE_TIMEOUT;
> @@ -1470,7 +1466,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>   		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
>
>   	/* Optimistic spin for the next ~jiffie before touching IRQs */
> -	if (__i915_spin_request(req, state))
> +	if (i915_spin_request(req, state, 5))
>   		goto complete;
>
>   	set_current_state(state);
> @@ -1512,6 +1508,10 @@ wakeup:
>   		 */
>   		if (__i915_request_irq_complete(req))
>   			break;
> +
> +		/* Only spin if we know the GPU is processing this request */
> +		if (i915_spin_request(req, state, 2))
> +			break;
>   	}
>   	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
>
> @@ -3050,8 +3050,16 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_gem_request *request;
>
> +	/* We are called by the error capture and reset at a random
> +	 * point in time. In particular, note that neither is crucially
> +	 * ordered with an interrupt. After a hang, the GPU is dead and we
> +	 * assume that no more writes can happen (we waited long enough for
> +	 * all writes that were in transaction to be flushed) - adding an
> +	 * extra delay for a recent interrupt is pointless. Hence, we do
> +	 * not need an engine->irq_seqno_barrier() before the seqno reads.
> +	 */
>   	list_for_each_entry(request, &engine->request_list, list) {
> -		if (i915_gem_request_completed(request, false))
> +		if (i915_gem_request_completed(request))
>   			continue;
>
>   		return request;
> @@ -3183,7 +3191,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
>   					   struct drm_i915_gem_request,
>   					   list);
>
> -		if (!i915_gem_request_completed(request, true))
> +		if (!i915_gem_request_completed(request))
>   			break;
>
>   		i915_gem_request_retire(request);
> @@ -3207,7 +3215,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
>   	}
>
>   	if (unlikely(engine->trace_irq_req &&
> -		     i915_gem_request_completed(engine->trace_irq_req, true))) {
> +		     i915_gem_request_completed(engine->trace_irq_req))) {
>   		engine->irq_put(engine);
>   		i915_gem_request_assign(&engine->trace_irq_req, NULL);
>   	}
> @@ -3305,7 +3313,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
>   		if (req == NULL)
>   			continue;
>
> -		if (i915_gem_request_completed(req, true))
> +		if (i915_gem_request_completed(req))
>   			i915_gem_object_retire__read(obj, i);
>   	}
>
> @@ -3413,7 +3421,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
>   	if (to == from)
>   		return 0;
>
> -	if (i915_gem_request_completed(from_req, true))
> +	if (i915_gem_request_completed(from_req))
>   		return 0;
>
>   	if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) {
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 699da067bb85..adeafbc96760 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11594,7 +11594,7 @@ static bool __pageflip_stall_check_cs(struct drm_i915_private *dev_priv,
>   	vblank = intel_crtc_get_vblank_counter(intel_crtc);
>   	if (work->flip_ready_vblank == 0) {
>   		if (work->flip_queued_req &&
> -		    !i915_gem_request_completed(work->flip_queued_req, true))
> +		    !i915_gem_request_completed(work->flip_queued_req))
>   			return false;
>
>   		work->flip_ready_vblank = vblank;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 4f612b96224e..2f54fc5d1131 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7739,7 +7739,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
>   	struct request_boost *boost = container_of(work, struct request_boost, work);
>   	struct drm_i915_gem_request *req = boost->req;
>
> -	if (!i915_gem_request_completed(req, true))
> +	if (!i915_gem_request_completed(req))
>   		gen6_rps_boost(req->i915, NULL, req->emitted_jiffies);
>
>   	i915_gem_request_unreference(req);
> @@ -7753,7 +7753,7 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req)
>   	if (req == NULL || INTEL_GEN(req->i915) < 6)
>   		return;
>
> -	if (i915_gem_request_completed(req, true))
> +	if (i915_gem_request_completed(req))
>   		return;
>
>   	boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2016-06-27 10:32 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-25 10:12 A trail of breadcrumbs Chris Wilson
2016-06-25 10:12 ` [PATCH 01/25] drm/i915: Preserve current RPS frequency across init Chris Wilson
2016-06-25 10:12 ` [PATCH 02/25] drm/i915: Remove superfluous powersave work flushing Chris Wilson
2016-06-25 10:12 ` [PATCH 03/25] drm/i915: Defer enabling rc6 til after we submit the first batch/context Chris Wilson
2016-06-25 10:12 ` [PATCH 04/25] drm: Restore double clflush on the last partial cacheline Chris Wilson
2016-06-25 10:12 ` [PATCH 05/25] drm/i915/shrinker: Flush active on objects before counting Chris Wilson
2016-06-25 10:12 ` [PATCH 06/25] drm/i915: Delay queuing hangcheck to wait-request Chris Wilson
2016-06-25 10:12 ` [PATCH 07/25] drm/i915: Remove the dedicated hangcheck workqueue Chris Wilson
2016-06-25 10:12 ` [PATCH 08/25] drm/i915: Make queueing the hangcheck work inline Chris Wilson
2016-06-25 10:12 ` [PATCH 09/25] drm/i915: Separate GPU hang waitqueue from advance Chris Wilson
2016-06-25 10:12 ` [PATCH 10/25] drm/i915: Slaughter the thundering i915_wait_request herd Chris Wilson
2016-06-25 10:12 ` [PATCH 11/25] drm/i915: Spin after waking up for an interrupt Chris Wilson
2016-06-27 10:32   ` Tvrtko Ursulin [this message]
2016-06-28  8:55     ` Chris Wilson
2016-06-28  9:17       ` Chris Wilson
2016-06-28  9:25         ` Tvrtko Ursulin
2016-06-25 10:12 ` [PATCH 12/25] drm/i915: Use HWS for seqno tracking everywhere Chris Wilson
2016-06-25 10:12 ` [PATCH 13/25] drm/i915: Stop mapping the scratch page into CPU space Chris Wilson
2016-06-25 10:12 ` [PATCH 14/25] drm/i915: Allocate scratch page from stolen Chris Wilson
2016-06-25 10:12 ` [PATCH 15/25] drm/i915: Refactor scratch object allocation for gen2 w/a buffer Chris Wilson
2016-06-25 10:12 ` [PATCH 16/25] drm/i915: Add a delay between interrupt and inspecting the final seqno (ilk) Chris Wilson
2016-06-25 10:13 ` [PATCH 17/25] drm/i915: Check the CPU cached value in HWS of seqno after waking the waiter Chris Wilson
2016-06-25 10:13 ` [PATCH 18/25] drm/i915: Only apply one barrier after a breadcrumb interrupt is posted Chris Wilson
2016-06-27 10:35   ` Tvrtko Ursulin
2016-06-25 10:13 ` [PATCH 19/25] drm/i915: Stop setting wraparound seqno on initialisation Chris Wilson
2016-06-25 10:13 ` [PATCH 20/25] drm/i915: Only query timestamp when measuring elapsed time Chris Wilson
2016-06-27 10:37   ` Tvrtko Ursulin
2016-06-25 10:13 ` [PATCH 21/25] drm/i915: Convert trace-irq to the breadcrumb waiter Chris Wilson
2016-06-27 11:38   ` Tvrtko Ursulin
2016-06-28  8:49     ` Chris Wilson
2016-06-25 10:13 ` [PATCH 22/25] drm/i915: Embed signaling node into the GEM request Chris Wilson
2016-06-27 11:54   ` Tvrtko Ursulin
2016-06-25 10:13 ` [PATCH 23/25] drm/i915: Move the get/put irq locking into the caller Chris Wilson
2016-06-27 12:11   ` Tvrtko Ursulin
2016-06-28  8:42     ` Chris Wilson
2016-06-25 10:13 ` [PATCH 24/25] drm/i915: Simplify enabling user-interrupts with L3-remapping Chris Wilson
2016-06-25 10:13 ` [PATCH 25/25] drm/i915: Remove debug noise on detecting fault-injection of missed interrupts Chris Wilson
2016-06-25 10:43 ` ✗ Ro.CI.BAT: warning for series starting with [01/25] drm/i915: Preserve current RPS frequency across init Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=577100AA.1020204@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox