All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jani Nikula <jani.nikula@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/5] drm/i915: Fallback to using CPU relocations	for large batch buffers
Date: Mon, 26 Jan 2015 10:57:17 +0200	[thread overview]
Message-ID: <87d261hqc2.fsf@intel.com> (raw)
In-Reply-To: <1421234459-21424-2-git-send-email-chris@chris-wilson.co.uk>

On Wed, 14 Jan 2015, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> If the batch buffer is too large to fit into the aperture and we need a
> GTT mapping for relocations, we currently fail. This only applies to a
> subset of machines for a subset of environments, quite undesirable. We
> can simply check after failing to insert the batch into the GTT as to
> whether we only need a mappable binding for relocation and, if so, we can
> revert to using a non-mappable binding and an alternate relocation
> method. However, using relocate_entry_cpu() is excruciatingly slow for
> large buffers on non-LLC as the entire buffer requires clflushing before
> and after the relocation handling. Alternatively, we can implement a
> third relocation method that only clflushes around the relocation entry.
> This is still slower than updating through the GTT, so we prefer using
> the GTT where possible, but is orders of magnitude faster as we
> typically do not have to then clflush the entire buffer.
>
> An alternative idea of using a temporary WC mapping of the backing store
> is promising (it should be faster than using the GTT itself), but
> requires fairly extensive arch/x86 support - along the lines of
> kmap_atomic_prof_pfn() (which is not universally implemented even for
> x86).
>
> Testcase: igt/gem_exec_big #pnv,byt
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88392

Tested-by: lu hua <huax.lu@intel.com>

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 84 +++++++++++++++++++++++++-----
>  1 file changed, 72 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index e3ef17783765..06bdf7670584 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -251,7 +251,6 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
>  {
>  	return (HAS_LLC(obj->base.dev) ||
>  		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
> -		!obj->map_and_fenceable ||
>  		obj->cache_level != I915_CACHE_NONE);
>  }
>  
> @@ -337,6 +336,51 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
>  	return 0;
>  }
>  
> +static void
> +clflush_write32(void *addr, uint32_t value)
> +{
> +	/* This is not a fast path, so KISS. */
> +	drm_clflush_virt_range(addr, sizeof(uint32_t));
> +	*(uint32_t *)addr = value;
> +	drm_clflush_virt_range(addr, sizeof(uint32_t));
> +}
> +
> +static int
> +relocate_entry_clflush(struct drm_i915_gem_object *obj,
> +		       struct drm_i915_gem_relocation_entry *reloc,
> +		       uint64_t target_offset)
> +{
> +	struct drm_device *dev = obj->base.dev;
> +	uint32_t page_offset = offset_in_page(reloc->offset);
> +	uint64_t delta = (int)reloc->delta + target_offset;
> +	char *vaddr;
> +	int ret;
> +
> +	ret = i915_gem_object_set_to_gtt_domain(obj, true);
> +	if (ret)
> +		return ret;
> +
> +	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
> +				reloc->offset >> PAGE_SHIFT));
> +	clflush_write32(vaddr + page_offset, lower_32_bits(delta));
> +
> +	if (INTEL_INFO(dev)->gen >= 8) {
> +		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
> +
> +		if (page_offset == 0) {
> +			kunmap_atomic(vaddr);
> +			vaddr = kmap_atomic(i915_gem_object_get_page(obj,
> +			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
> +		}
> +
> +		clflush_write32(vaddr + page_offset, upper_32_bits(delta));
> +	}
> +
> +	kunmap_atomic(vaddr);
> +
> +	return 0;
> +}
> +
>  static int
>  i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
>  				   struct eb_vmas *eb,
> @@ -426,9 +470,12 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
>  
>  	if (use_cpu_reloc(obj))
>  		ret = relocate_entry_cpu(obj, reloc, target_offset);
> -	else
> +	else if (obj->map_and_fenceable)
>  		ret = relocate_entry_gtt(obj, reloc, target_offset);
> -
> +	else if (cpu_has_clflush)
> +		ret = relocate_entry_clflush(obj, reloc, target_offset);
> +	else
> +		ret = -ENODEV;
>  	if (ret)
>  		return ret;
>  
> @@ -525,6 +572,12 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
>  	return ret;
>  }
>  
> +static bool only_mappable_for_reloc(unsigned int flags)
> +{
> +	return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
> +		__EXEC_OBJECT_NEEDS_MAP;
> +}
> +
>  static int
>  i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  				struct intel_engine_cs *ring,
> @@ -536,14 +589,21 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  	int ret;
>  
>  	flags = 0;
> -	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
> -		flags |= PIN_GLOBAL | PIN_MAPPABLE;
> -	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
> -		flags |= PIN_GLOBAL;
> -	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
> -		flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> +	if (!drm_mm_node_allocated(&vma->node)) {
> +		if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
> +			flags |= PIN_GLOBAL | PIN_MAPPABLE;
> +		if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
> +			flags |= PIN_GLOBAL;
> +		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
> +			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> +	}
>  
>  	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
> +	if ((ret == -ENOSPC  || ret == -E2BIG) &&
> +	    only_mappable_for_reloc(entry->flags))
> +		ret = i915_gem_object_pin(obj, vma->vm,
> +					  entry->alignment,
> +					  flags & ~(PIN_GLOBAL | PIN_MAPPABLE));
>  	if (ret)
>  		return ret;
>  
> @@ -605,13 +665,13 @@ eb_vma_misplaced(struct i915_vma *vma)
>  	    vma->node.start & (entry->alignment - 1))
>  		return true;
>  
> -	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
> -		return true;
> -
>  	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
>  	    vma->node.start < BATCH_OFFSET_BIAS)
>  		return true;
>  
> +	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
> +		return !only_mappable_for_reloc(entry->flags);
> +
>  	return false;
>  }
>  
> -- 
> 2.1.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Jani Nikula, Intel Open Source Technology Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2015-01-26  8:56 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-14 11:20 [PATCH 1/5] agp/intel: Serialise after GTT updates Chris Wilson
2015-01-14 11:20 ` [PATCH 2/5] drm/i915: Fallback to using CPU relocations for large batch buffers Chris Wilson
2015-01-15  9:45   ` Daniel, Thomas
2015-01-26  8:57   ` Jani Nikula [this message]
2015-01-27 15:09   ` Daniel Vetter
2015-01-27 21:43     ` Chris Wilson
2015-01-28  9:14       ` Daniel Vetter
2015-01-28  9:34         ` Chris Wilson
2015-01-14 11:20 ` [PATCH 3/5] drm/i915: Trim the command parser allocations Chris Wilson
2015-02-13 13:08   ` John Harrison
2015-02-13 13:23     ` Chris Wilson
2015-02-13 16:43       ` John Harrison
2015-02-23 16:09         ` Daniel Vetter
2015-01-14 11:20 ` [PATCH 4/5] drm/i915: Cache last obj->pages location for i915_gem_object_get_page() Chris Wilson
2015-02-13 13:33   ` John Harrison
2015-02-13 13:35   ` John Harrison
2015-02-13 14:28     ` Chris Wilson
2015-01-14 11:20 ` [PATCH 5/5] drm/i915: Tidy batch pool logic Chris Wilson
2015-01-14 20:54   ` shuang.he
2015-02-13 14:00   ` John Harrison
2015-02-13 14:57     ` Chris Wilson
2015-01-26 10:47 ` [PATCH v2] agp/intel: Serialise after GTT updates Chris Wilson
2015-01-27 14:58   ` Daniel Vetter
2015-01-27 21:44     ` Chris Wilson
2015-01-28  9:15       ` Daniel Vetter
2015-01-28  7:50   ` shuang.he
2015-02-06  0:11 ` [PATCH 1/5] " Jesse Barnes
2015-02-06  8:31   ` Chris Wilson
2015-02-06  8:32   ` Daniel Vetter
2015-02-13  8:59     ` Ville Syrjälä
2015-02-13  9:25       ` Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87d261hqc2.fsf@intel.com \
    --to=jani.nikula@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.