intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 07/21] drm/i915: Spin after waking up for an interrupt
Date: Mon, 6 Jun 2016 15:39:15 +0100	[thread overview]
Message-ID: <57558B13.9040008@linux.intel.com> (raw)
In-Reply-To: <1464970133-29859-8-git-send-email-chris@chris-wilson.co.uk>


On 03/06/16 17:08, Chris Wilson wrote:
> When waiting for an interrupt (waiting for the GPU to complete some
> work), we know we are the single waiter for the GPU. We also know when
> the GPU has nearly completed our request (or at least started processing
> it), so after being woken and we detect that the GPU is almost finished,

We cannot detect GPU is almost finished, just that it has started 
processing the waited on batch. I suggest rewording the commit msg to be 
accurate.

> allow the bottom-half to spin for a very short while to reduce client
> latencies.

Hm in fact I think it should explain that it is not actually adding the 
busy spin for the bottom half but extending it by 2us for the first 
waiter case.

>
> The impact is minimal, there was an improvement to the realtime-vs-many
> clients case, but exporting the function proves useful later.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c  |  2 +-
>   drivers/gpu/drm/i915/i915_drv.h      | 26 +++++++++++++++--------
>   drivers/gpu/drm/i915/i915_gem.c      | 40 +++++++++++++++++++++---------------
>   drivers/gpu/drm/i915/intel_display.c |  2 +-
>   drivers/gpu/drm/i915/intel_pm.c      |  4 ++--
>   5 files changed, 45 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 48683538b4e2..0c287bf0d230 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -663,7 +663,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
>   					   i915_gem_request_get_seqno(work->flip_queued_req),
>   					   dev_priv->next_seqno,
>   					   engine->get_seqno(engine),
> -					   i915_gem_request_completed(work->flip_queued_req, true));
> +					   i915_gem_request_completed(work->flip_queued_req));
>   			} else
>   				seq_printf(m, "Flip not associated with any ring\n");
>   			seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 68b383d98457..b0460eda2113 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3219,24 +3219,27 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
>   	return (int32_t)(seq1 - seq2) >= 0;
>   }
>
> -static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
> -					   bool lazy_coherency)
> +static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req)
>   {
> -	if (!lazy_coherency && req->engine->irq_seqno_barrier)
> -		req->engine->irq_seqno_barrier(req->engine);
>   	return i915_seqno_passed(req->engine->get_seqno(req->engine),
>   				 req->previous_seqno);
>   }
>
> -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
> -					      bool lazy_coherency)
> +static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req)
>   {
> -	if (!lazy_coherency && req->engine->irq_seqno_barrier)
> -		req->engine->irq_seqno_barrier(req->engine);
>   	return i915_seqno_passed(req->engine->get_seqno(req->engine),
>   				 req->seqno);
>   }
>
> +bool __i915_spin_request(const struct drm_i915_gem_request *request,
> +			 int state, unsigned long timeout_us);
> +static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
> +				     int state, unsigned long timeout_us)
> +{
> +	return (i915_gem_request_started(request) &&
> +		__i915_spin_request(request, state, timeout_us));
> +}
> +
>   int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno);
>   int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
>
> @@ -3913,6 +3916,8 @@ static inline void i915_trace_irq_get(struct intel_engine_cs *engine,
>
>   static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *engine = req->engine;
> +
>   	/* Ensure our read of the seqno is coherent so that we
>   	 * do not "miss an interrupt" (i.e. if this is the last
>   	 * request and the seqno write from the GPU is not visible
> @@ -3924,7 +3929,10 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
>   	 * but it is easier and safer to do it every time the waiter
>   	 * is woken.
>   	 */
> -	if (i915_gem_request_completed(req, false))
> +	if (engine->irq_seqno_barrier)
> +		engine->irq_seqno_barrier(engine);
> +
> +	if (i915_gem_request_completed(req))
>   		return true;
>
>   	/* We need to check whether any gpu reset happened in between
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d08edb3d16f1..bf5c93f2bd81 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1155,9 +1155,9 @@ static bool busywait_stop(unsigned long timeout, unsigned cpu)
>   	return this_cpu != cpu;
>   }
>
> -static bool __i915_spin_request(struct drm_i915_gem_request *req, int state)
> +bool __i915_spin_request(const struct drm_i915_gem_request *req,
> +			 int state, unsigned long timeout_us)
>   {
> -	unsigned long timeout;
>   	unsigned cpu;
>
>   	/* When waiting for high frequency requests, e.g. during synchronous
> @@ -1170,19 +1170,15 @@ static bool __i915_spin_request(struct drm_i915_gem_request *req, int state)
>   	 * takes to sleep on a request, on the order of a microsecond.
>   	 */
>
> -	/* Only spin if we know the GPU is processing this request */
> -	if (!i915_gem_request_started(req, true))
> -		return false;
> -
> -	timeout = local_clock_us(&cpu) + 5;
> +	timeout_us += local_clock_us(&cpu);
>   	do {
> -		if (i915_gem_request_completed(req, true))
> +		if (i915_gem_request_completed(req))
>   			return true;
>
>   		if (signal_pending_state(state, current))
>   			break;
>
> -		if (busywait_stop(timeout, cpu))
> +		if (busywait_stop(timeout_us, cpu))
>   			break;
>
>   		cpu_relax_lowlatency();
> @@ -1224,7 +1220,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>   	if (list_empty(&req->list))
>   		return 0;
>
> -	if (i915_gem_request_completed(req, true))
> +	if (i915_gem_request_completed(req))
>   		return 0;
>
>   	timeout_remain = MAX_SCHEDULE_TIMEOUT;
> @@ -1249,7 +1245,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>   		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
>
>   	/* Optimistic spin for the next ~jiffie before touching IRQs */
> -	if (__i915_spin_request(req, state))
> +	if (i915_spin_request(req, state, 5))
>   		goto complete;
>
>   	intel_wait_init(&wait, req->seqno);
> @@ -1290,6 +1286,10 @@ wakeup:
>   		 */
>   		if (__i915_request_irq_complete(req))
>   			break;
> +
> +		/* Only spin if we know the GPU is processing this request */
> +		if (i915_spin_request(req, state, 2))
> +			break;

The behaviour described in the comment is embedded in the function 
called. Or change to "Only spin*s* if we know.." ?

>   	}
>   	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
>
> @@ -2805,8 +2805,16 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_gem_request *request;
>
> +	/* We are called by the error capture and reset at a random
> +	 * point in time. In particular, note that neither is crucially
> +	 * ordered with an interrupt. After a hang, the GPU is dead and we
> +	 * assume that no more writes can happen (we waited long enough for
> +	 * all writes that were in transaction to be flushed) - adding an
> +	 * extra delay for a recent interrupt is pointless. Hence, we do
> +	 * not need an engine->irq_seqno_barrier() before the seqno reads.
> +	 */
>   	list_for_each_entry(request, &engine->request_list, list) {
> -		if (i915_gem_request_completed(request, false))
> +		if (i915_gem_request_completed(request))
>   			continue;
>
>   		return request;
> @@ -2937,7 +2945,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
>   					   struct drm_i915_gem_request,
>   					   list);
>
> -		if (!i915_gem_request_completed(request, true))
> +		if (!i915_gem_request_completed(request))
>   			break;
>
>   		i915_gem_request_retire(request);
> @@ -2961,7 +2969,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
>   	}
>
>   	if (unlikely(engine->trace_irq_req &&
> -		     i915_gem_request_completed(engine->trace_irq_req, true))) {
> +		     i915_gem_request_completed(engine->trace_irq_req))) {
>   		engine->irq_put(engine);
>   		i915_gem_request_assign(&engine->trace_irq_req, NULL);
>   	}
> @@ -3058,7 +3066,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
>   		if (req == NULL)
>   			continue;
>
> -		if (i915_gem_request_completed(req, true))
> +		if (i915_gem_request_completed(req))
>   			i915_gem_object_retire__read(obj, i);
>   	}
>
> @@ -3164,7 +3172,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
>   	if (to == from)
>   		return 0;
>
> -	if (i915_gem_request_completed(from_req, true))
> +	if (i915_gem_request_completed(from_req))
>   		return 0;
>
>   	if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) {
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 2bc291ac7243..bb09ee6d1a3f 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11590,7 +11590,7 @@ static bool __pageflip_stall_check_cs(struct drm_i915_private *dev_priv,
>   	vblank = intel_crtc_get_vblank_counter(intel_crtc);
>   	if (work->flip_ready_vblank == 0) {
>   		if (work->flip_queued_req &&
> -		    !i915_gem_request_completed(work->flip_queued_req, true))
> +		    !i915_gem_request_completed(work->flip_queued_req))
>   			return false;
>
>   		work->flip_ready_vblank = vblank;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 657a64fc2780..712bd0debb91 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7687,7 +7687,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
>   	struct request_boost *boost = container_of(work, struct request_boost, work);
>   	struct drm_i915_gem_request *req = boost->req;
>
> -	if (!i915_gem_request_completed(req, true))
> +	if (!i915_gem_request_completed(req))
>   		gen6_rps_boost(req->i915, NULL, req->emitted_jiffies);
>
>   	i915_gem_request_unreference(req);
> @@ -7701,7 +7701,7 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req)
>   	if (req == NULL || INTEL_GEN(req->i915) < 6)
>   		return;
>
> -	if (i915_gem_request_completed(req, true))
> +	if (i915_gem_request_completed(req))
>   		return;
>
>   	boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
>

I am not such a big fan of spinning, but the code looks correct. Just 
please improve the commit message and that comment.

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2016-06-06 14:39 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-03 16:08 Breadcrumbs, again Chris Wilson
2016-06-03 16:08 ` [PATCH 01/21] drm/i915/shrinker: Flush active on objects before counting Chris Wilson
2016-06-03 16:08 ` [PATCH 02/21] drm/i915: Delay queuing hangcheck to wait-request Chris Wilson
2016-06-08  8:42   ` Daniel Vetter
2016-06-08  9:13     ` Chris Wilson
2016-06-03 16:08 ` [PATCH 03/21] drm/i915: Remove the dedicated hangcheck workqueue Chris Wilson
2016-06-06 12:52   ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 04/21] drm/i915: Make queueing the hangcheck work inline Chris Wilson
2016-06-03 16:08 ` [PATCH 05/21] drm/i915: Separate GPU hang waitqueue from advance Chris Wilson
2016-06-06 13:00   ` Tvrtko Ursulin
2016-06-07 12:11     ` Arun Siluvery
2016-06-03 16:08 ` [PATCH 06/21] drm/i915: Slaughter the thundering i915_wait_request herd Chris Wilson
2016-06-06 13:58   ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 07/21] drm/i915: Spin after waking up for an interrupt Chris Wilson
2016-06-06 14:39   ` Tvrtko Ursulin [this message]
2016-06-03 16:08 ` [PATCH 08/21] drm/i915: Use HWS for seqno tracking everywhere Chris Wilson
2016-06-06 14:55   ` Tvrtko Ursulin
2016-06-08  9:24     ` Chris Wilson
2016-06-03 16:08 ` [PATCH 09/21] drm/i915: Stop mapping the scratch page into CPU space Chris Wilson
2016-06-06 15:03   ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 10/21] drm/i915: Allocate scratch page from stolen Chris Wilson
2016-06-06 15:05   ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 11/21] drm/i915: Refactor scratch object allocation for gen2 w/a buffer Chris Wilson
2016-06-06 15:09   ` Tvrtko Ursulin
2016-06-08  9:27     ` Chris Wilson
2016-06-03 16:08 ` [PATCH 12/21] drm/i915: Add a delay between interrupt and inspecting the final seqno (ilk) Chris Wilson
2016-06-03 16:08 ` [PATCH 13/21] drm/i915: Check the CPU cached value of seqno after waking the waiter Chris Wilson
2016-06-06 15:10   ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 14/21] drm/i915: Only apply one barrier after a breadcrumb interrupt is posted Chris Wilson
2016-06-06 15:34   ` Tvrtko Ursulin
2016-06-08  9:35     ` Chris Wilson
2016-06-08  9:57       ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 15/21] drm/i915: Stop setting wraparound seqno on initialisation Chris Wilson
2016-06-08  8:54   ` Daniel Vetter
2016-06-03 16:08 ` [PATCH 16/21] drm/i915: Only query timestamp when measuring elapsed time Chris Wilson
2016-06-06 13:50   ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 17/21] drm/i915: Convert trace-irq to the breadcrumb waiter Chris Wilson
2016-06-07 12:04   ` Tvrtko Ursulin
2016-06-08  9:48     ` Chris Wilson
2016-06-08 10:16       ` Tvrtko Ursulin
2016-06-08 11:24         ` Chris Wilson
2016-06-08 11:47           ` Tvrtko Ursulin
2016-06-08 12:34             ` Chris Wilson
2016-06-08 12:44               ` Tvrtko Ursulin
2016-06-08 13:47                 ` Chris Wilson
2016-06-03 16:08 ` [PATCH 18/21] drm/i915: Embed signaling node into the GEM request Chris Wilson
2016-06-07 12:31   ` Tvrtko Ursulin
2016-06-08  9:54     ` Chris Wilson
2016-06-03 16:08 ` [PATCH 19/21] drm/i915: Move the get/put irq locking into the caller Chris Wilson
2016-06-07 12:46   ` Tvrtko Ursulin
2016-06-08 10:01     ` Chris Wilson
2016-06-08 10:18       ` Tvrtko Ursulin
2016-06-08 11:10         ` Chris Wilson
2016-06-08 11:49           ` Tvrtko Ursulin
2016-06-08 12:54             ` Chris Wilson
2016-06-03 16:08 ` [PATCH 20/21] drm/i915: Simplify enabling user-interrupts with L3-remapping Chris Wilson
2016-06-07 12:50   ` Tvrtko Ursulin
2016-06-03 16:08 ` [PATCH 21/21] drm/i915: Remove debug noise on detecting fault-injection of missed interrupts Chris Wilson
2016-06-07 12:51   ` Tvrtko Ursulin
2016-06-03 16:35 ` ✗ Ro.CI.BAT: failure for series starting with [01/21] drm/i915/shrinker: Flush active on objects before counting Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=57558B13.9040008@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).