public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
From: Daniel Vetter <daniel@ffwll.ch>
To: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>,
	intel-gfx@lists.freedesktop.org,
	Ben Widawsky <benjamin.widawsky@intel.com>
Subject: Re: [PATCH] drm/i915: Force CPU relocations if not GTT mapped
Date: Mon, 11 Aug 2014 12:02:36 +0200	[thread overview]
Message-ID: <20140811100236.GH8727@phenom.ffwll.local> (raw)
In-Reply-To: <1407654417-14266-1-git-send-email-chris@chris-wilson.co.uk>

On Sun, Aug 10, 2014 at 08:06:57AM +0100, Chris Wilson wrote:
> Move the decision on whether we need to have a mappable object during
> execbuffer to the fore and then reuse that decision by propagating the
> flag through to reservation. As a corollary, before doing the actual
> relocation through the GTT, we can make sure that we do have a GTT
> mapping through which to operate.
> 
> v2: Revamp and resend to ease future patches.
> v3: Refresh patch rationale
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=81094
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>

Ok, the secure batch hunk in here looks rather unrelated and imo also a
bit incomplete. I've dropped it. And I've added a bit of text to the
commit message to explain why this patch touches map_and_fenceable logic.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem.c            |  8 +--
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 80 ++++++++++++++----------------
>  2 files changed, 42 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 99250d27668d..6366230c4e32 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2930,9 +2930,8 @@ int i915_vma_unbind(struct i915_vma *vma)
>  	vma->unbind_vma(vma);
>  
>  	list_del_init(&vma->mm_list);
> -	/* Avoid an unnecessary call to unbind on rebind. */
>  	if (i915_is_ggtt(vma->vm))
> -		obj->map_and_fenceable = true;
> +		obj->map_and_fenceable = false;
>  
>  	drm_mm_remove_node(&vma->node);
>  	i915_gem_vma_destroy(vma);
> @@ -3284,6 +3283,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
>  			return 0;
>  		}
>  	} else if (enable) {
> +		if (WARN_ON(!obj->map_and_fenceable))
> +			return -EINVAL;
> +
>  		reg = i915_find_fence_reg(dev);
>  		if (IS_ERR(reg))
>  			return PTR_ERR(reg);
> @@ -4333,8 +4335,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  
>  	obj->fence_reg = I915_FENCE_REG_NONE;
>  	obj->madv = I915_MADV_WILLNEED;
> -	/* Avoid an unnecessary call to unbind on the first bind. */
> -	obj->map_and_fenceable = true;
>  
>  	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 60998fc4e5b2..8b734d5d16b4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -35,6 +35,7 @@
>  
>  #define  __EXEC_OBJECT_HAS_PIN (1<<31)
>  #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> +#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
>  #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
>  
>  #define BATCH_OFFSET_BIAS (256*1024)
> @@ -535,14 +536,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
>  }
>  
>  static int
> -need_reloc_mappable(struct i915_vma *vma)
> -{
> -	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -	return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
> -		i915_is_ggtt(vma->vm);
> -}
> -
> -static int
>  i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  				struct intel_engine_cs *ring,
>  				bool *need_reloc)
> @@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  	struct drm_i915_gem_object *obj = vma->obj;
>  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
>  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> -	bool need_fence;
>  	uint64_t flags;
>  	int ret;
>  
>  	flags = 0;
> -
> -	need_fence =
> -		has_fenced_gpu_access &&
> -		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> -		obj->tiling_mode != I915_TILING_NONE;
> -	if (need_fence || need_reloc_mappable(vma))
> +	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
>  		flags |= PIN_MAPPABLE;
> -
>  	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
>  		flags |= PIN_GLOBAL;
>  	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
> @@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  }
>  
>  static bool
> -eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
> +need_reloc_mappable(struct i915_vma *vma)
>  {
>  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -	struct drm_i915_gem_object *obj = vma->obj;
> -	bool need_fence, need_mappable;
>  
> -	need_fence =
> -		has_fenced_gpu_access &&
> -		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> -		obj->tiling_mode != I915_TILING_NONE;
> -	need_mappable = need_fence || need_reloc_mappable(vma);
> +	if (entry->relocation_count == 0)
> +		return false;
>  
> -	WARN_ON((need_mappable || need_fence) &&
> +	if (!i915_is_ggtt(vma->vm))
> +		return false;
> +
> +	/* See also use_cpu_reloc() */
> +	if (HAS_LLC(vma->obj->base.dev))
> +		return false;
> +
> +	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
> +		return false;
> +
> +	return true;
> +}
> +
> +static bool
> +eb_vma_misplaced(struct i915_vma *vma)
> +{
> +	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> +	struct drm_i915_gem_object *obj = vma->obj;
> +
> +	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
>  	       !i915_is_ggtt(vma->vm));
>  
>  	if (entry->alignment &&
>  	    vma->node.start & (entry->alignment - 1))
>  		return true;
>  
> -	if (need_mappable && !obj->map_and_fenceable)
> +	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
>  		return true;
>  
>  	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
> @@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>  			obj->tiling_mode != I915_TILING_NONE;
>  		need_mappable = need_fence || need_reloc_mappable(vma);
>  
> -		if (need_mappable)
> +		if (need_mappable) {
> +			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>  			list_move(&vma->exec_list, &ordered_vmas);
> -		else
> +		} else
>  			list_move_tail(&vma->exec_list, &ordered_vmas);
>  
>  		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
> @@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>  			if (!drm_mm_node_allocated(&vma->node))
>  				continue;
>  
> -			if (eb_vma_misplaced(vma, has_fenced_gpu_access))
> +			if (eb_vma_misplaced(vma))
>  				ret = i915_vma_unbind(vma);
>  			else
>  				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
> @@ -1386,25 +1387,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
>  	 * batch" bit. Hence we need to pin secure batches into the global gtt.
>  	 * hsw should have this fixed, but bdw mucks it up again. */
> -	if (flags & I915_DISPATCH_SECURE &&
> -	    !batch_obj->has_global_gtt_mapping) {
> -		/* When we have multiple VMs, we'll need to make sure that we
> -		 * allocate space first */
> -		struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
> -		BUG_ON(!vma);
> -		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
> -	}
> +	if (flags & I915_DISPATCH_SECURE) {
> +		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
> +		if (ret)
> +			goto err;
>  
> -	if (flags & I915_DISPATCH_SECURE)
>  		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
> -	else
> +	} else
>  		exec_start += i915_gem_obj_offset(batch_obj, vm);
>  
>  	ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
> -			args, &eb->vmas, batch_obj, exec_start, flags);
> -	if (ret)
> -		goto err;
> +					   args, &eb->vmas, batch_obj, exec_start, flags);
>  
> +	if (flags & I915_DISPATCH_SECURE)
> +		i915_gem_object_ggtt_unpin(batch_obj);
>  err:
>  	/* the request owns the ref now */
>  	i915_gem_context_unreference(ctx);
> -- 
> 2.1.0.rc1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

  reply	other threads:[~2014-08-11 10:02 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-09 16:37 [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Chris Wilson
2014-08-09 16:37 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
2014-08-09 19:29   ` Ben Widawsky
2014-08-10  6:55     ` Chris Wilson
2014-08-10  7:04       ` Chris Wilson
2014-08-10  7:06     ` [PATCH] drm/i915: Force CPU relocations if not GTT mapped Chris Wilson
2014-08-11 10:02       ` Daniel Vetter [this message]
2014-08-09 16:37 ` [PATCH 3/3] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access Chris Wilson
2014-08-11 10:20   ` Daniel Vetter
2014-08-11  9:36 ` [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Daniel Vetter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140811100236.GH8727@phenom.ffwll.local \
    --to=daniel@ffwll.ch \
    --cc=benjamin.widawsky@intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=daniel.vetter@ffwll.ch \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox