Re: [PATCH 1/2] drm/i915: Pull the unconditional GPU cache invalidation into request construction

From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 1/2] drm/i915: Pull the unconditional GPU cache invalidation into request construction
Date: Mon, 20 Nov 2017 17:26:57 +0200	[thread overview]
Message-ID: <87wp2l3r9a.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20171120102002.22254-1-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> As the request now may implicitly invoke a context-switch, we should
> follow that with a GPU TLB invalidation. Also even before using GGTT, we

s/follow/preample? As it is the context-switch you are preampling.

Also point that the request allocation will invote a context-switch
but only after next patch.

So with some confusion untangled on these first lines,

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> should invalidate the TLBs for any updates (as well as the ppgtt
> invalidates that are unconditionally applied by execbuf). Since we
> almost always require the TLB invalidate, do it unconditionally on
> request allocation and so we can remove it from all other paths.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c        |  7 +------
>  drivers/gpu/drm/i915/i915_gem_render_state.c      |  4 ----
>  drivers/gpu/drm/i915/i915_gem_request.c           | 24 ++++++++++++++++++-----
>  drivers/gpu/drm/i915/selftests/huge_pages.c       |  4 ----
>  drivers/gpu/drm/i915/selftests/i915_gem_context.c |  4 ----
>  drivers/gpu/drm/i915/selftests/i915_gem_request.c | 10 ----------
>  drivers/gpu/drm/i915/selftests/intel_hangcheck.c  |  4 ----
>  7 files changed, 20 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 53ccb27bfe91..b7895788bc75 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1111,10 +1111,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>  	if (err)
>  		goto err_request;
>  
> -	err = eb->engine->emit_flush(rq, EMIT_INVALIDATE);
> -	if (err)
> -		goto err_request;
> -
>  	err = i915_switch_context(rq);
>  	if (err)
>  		goto err_request;
> @@ -1818,8 +1814,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
>  	/* Unconditionally flush any chipset caches (for streaming writes). */
>  	i915_gem_chipset_flush(eb->i915);
>  
> -	/* Unconditionally invalidate GPU caches and TLBs. */
> -	return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE);
> +	return 0;
>  }
>  
>  static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index c2723a06fbb4..f7fc0df251ac 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -208,10 +208,6 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *rq)
>  	if (err)
>  		goto err_unpin;
>  
> -	err = engine->emit_flush(rq, EMIT_INVALIDATE);
> -	if (err)
> -		goto err_unpin;
> -
>  	err = engine->emit_bb_start(rq,
>  				    so.batch_offset, so.batch_size,
>  				    I915_DISPATCH_SECURE);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index e0d6221022a8..91eae1b20c42 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -703,17 +703,31 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
>  	GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz);
>  
> -	ret = engine->request_alloc(req);
> -	if (ret)
> -		goto err_ctx;
> -
> -	/* Record the position of the start of the request so that
> +	/*
> +	 * Record the position of the start of the request so that
>  	 * should we detect the updated seqno part-way through the
>  	 * GPU processing the request, we never over-estimate the
>  	 * position of the head.
>  	 */
>  	req->head = req->ring->emit;
>  
> +	/* Unconditionally invalidate GPU caches and TLBs. */
> +	ret = engine->emit_flush(req, EMIT_INVALIDATE);
> +	if (ret)
> +		goto err_ctx;
> +
> +	ret = engine->request_alloc(req);
> +	if (ret) {
> +		/*
> +		 * Past the point-of-no-return. Since we may have updated
> +		 * global state after partially completing the request alloc,
> +		 * we need to commit any commands so far emitted in the
> +		 * request to the HW.
> +		 */
> +		__i915_add_request(req, false);
> +		return ERR_PTR(ret);
> +	}
> +
>  	/* Check that we didn't interrupt ourselves with a new request */
>  	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
>  	return req;
> diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
> index 01af540b6ef9..159a2cb68765 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
> @@ -989,10 +989,6 @@ static int gpu_write(struct i915_vma *vma,
>  	i915_vma_unpin(batch);
>  	i915_vma_close(batch);
>  
> -	err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
> -	if (err)
> -		goto err_request;
> -
>  	err = i915_switch_context(rq);
>  	if (err)
>  		goto err_request;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index c82780a9d455..4ff30b9af1fe 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -158,10 +158,6 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
>  		goto err_batch;
>  	}
>  
> -	err = engine->emit_flush(rq, EMIT_INVALIDATE);
> -	if (err)
> -		goto err_request;
> -
>  	err = i915_switch_context(rq);
>  	if (err)
>  		goto err_request;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
> index 6bce99050e94..d7bf53ff8f84 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
> @@ -459,10 +459,6 @@ empty_request(struct intel_engine_cs *engine,
>  	if (IS_ERR(request))
>  		return request;
>  
> -	err = engine->emit_flush(request, EMIT_INVALIDATE);
> -	if (err)
> -		goto out_request;
> -
>  	err = i915_switch_context(request);
>  	if (err)
>  		goto out_request;
> @@ -675,9 +671,6 @@ static int live_all_engines(void *arg)
>  			goto out_request;
>  		}
>  
> -		err = engine->emit_flush(request[id], EMIT_INVALIDATE);
> -		GEM_BUG_ON(err);
> -
>  		err = i915_switch_context(request[id]);
>  		GEM_BUG_ON(err);
>  
> @@ -797,9 +790,6 @@ static int live_sequential_engines(void *arg)
>  			}
>  		}
>  
> -		err = engine->emit_flush(request[id], EMIT_INVALIDATE);
> -		GEM_BUG_ON(err);
> -
>  		err = i915_switch_context(request[id]);
>  		GEM_BUG_ON(err);
>  
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index 71ce06680d66..145bdc26553c 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -114,10 +114,6 @@ static int emit_recurse_batch(struct hang *h,
>  	if (err)
>  		goto unpin_vma;
>  
> -	err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
> -	if (err)
> -		goto unpin_hws;
> -
>  	err = i915_switch_context(rq);
>  	if (err)
>  		goto unpin_hws;
> -- 
> 2.15.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx