public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
From: "Ville Syrjälä" <ville.syrjala@linux.intel.com>
To: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Intel GFX <intel-gfx@lists.freedesktop.org>,
	Ben Widawsky <ben@bwidawsk.net>,
	DRI Development <dri-devel@lists.freedesktop.org>
Subject: Re: [Intel-gfx] [PATCH 4/4] drm/i915: Opportunistically reduce flushing at execbuf
Date: Sun, 14 Dec 2014 15:12:21 +0200	[thread overview]
Message-ID: <20141214131221.GD10649@intel.com> (raw)
In-Reply-To: <1418526504-26316-5-git-send-email-benjamin.widawsky@intel.com>

On Sat, Dec 13, 2014 at 07:08:24PM -0800, Ben Widawsky wrote:
> If we're moving a bunch of buffers from the CPU domain to the GPU domain, and
> we've already blown out the entire cache via a wbinvd, there is nothing more to
> do.
> 
> With this and the previous patches, I am seeing a 3x FPS increase on a certain
> benchmark which uses a giant 2d array texture. Unless I missed something in the
> code, it should only effect non-LLC i915 platforms.
> 
> I haven't yet run any numbers for other benchmarks, nor have I attempted to
> check if various conformance tests still pass.
> 
> NOTE: As mentioned in the previous patch, if one can easily obtain the largest
> buffer and attempt to flush it first, the results would be even more desirable.

So even with that optimization if you only have tons of small buffers
that need to be flushed you'd still take the clflush path for every
single one.

How difficult would it to calculate the total size to be flushed first,
and then make the clflush vs. wbinvd decision base on that?

> 
> Cc: DRI Development <dri-devel@lists.freedesktop.org>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  3 ++-
>  drivers/gpu/drm/i915/i915_gem.c            | 12 +++++-------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  8 +++++---
>  drivers/gpu/drm/i915/intel_lrc.c           |  8 +++++---
>  4 files changed, 17 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d68c75f..fdb92a3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2642,7 +2642,8 @@ static inline bool i915_stop_ring_allow_warn(struct drm_i915_private *dev_priv)
>  }
>  
>  void i915_gem_reset(struct drm_device *dev);
> -bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
> +enum drm_cache_flush
> +i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
>  int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
>  int __must_check i915_gem_init(struct drm_device *dev);
>  int i915_gem_init_rings(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index de241eb..3746738 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3608,7 +3608,7 @@ err_unpin:
>  	return vma;
>  }
>  
> -bool
> +enum drm_cache_flush
>  i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  			bool force)
>  {
> @@ -3617,14 +3617,14 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  	 * again at bind time.
>  	 */
>  	if (obj->pages == NULL)
> -		return false;
> +		return DRM_CACHE_FLUSH_NONE;
>  
>  	/*
>  	 * Stolen memory is always coherent with the GPU as it is explicitly
>  	 * marked as wc by the system, or the system is cache-coherent.
>  	 */
>  	if (obj->stolen || obj->phys_handle)
> -		return false;
> +		return DRM_CACHE_FLUSH_NONE;
>  
>  	/* If the GPU is snooping the contents of the CPU cache,
>  	 * we do not need to manually clear the CPU cache lines.  However,
> @@ -3635,12 +3635,10 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  	 * tracking.
>  	 */
>  	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
> -		return false;
> +		return DRM_CACHE_FLUSH_NONE;
>  
>  	trace_i915_gem_object_clflush(obj);
> -	drm_clflush_sg(obj->pages);
> -
> -	return true;
> +	return drm_clflush_sg(obj->pages);
>  }
>  
>  /** Flushes the GTT write domain for the object if it's dirty. */
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0c25f62..e8eb9e9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -827,7 +827,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>  {
>  	struct i915_vma *vma;
>  	uint32_t flush_domains = 0;
> -	bool flush_chipset = false;
> +	enum drm_cache_flush flush_chipset = DRM_CACHE_FLUSH_NONE;
>  	int ret;
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
> @@ -836,8 +836,10 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>  		if (ret)
>  			return ret;
>  
> -		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
> -			flush_chipset |= i915_gem_clflush_object(obj, false);
> +		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU &&
> +		    flush_chipset != DRM_CACHE_FLUSH_WBINVD) {
> +			flush_chipset = i915_gem_clflush_object(obj, false);
> +		}
>  
>  		flush_domains |= obj->base.write_domain;
>  	}
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 89b5577..a6c6ebd 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -611,7 +611,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
>  	struct intel_engine_cs *ring = ringbuf->ring;
>  	struct i915_vma *vma;
>  	uint32_t flush_domains = 0;
> -	bool flush_chipset = false;
> +	enum drm_cache_flush flush_chipset = DRM_CACHE_FLUSH_NONE;
>  	int ret;
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
> @@ -621,8 +621,10 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
>  		if (ret)
>  			return ret;
>  
> -		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
> -			flush_chipset |= i915_gem_clflush_object(obj, false);
> +		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU &&
> +		    flush_chipset != DRM_CACHE_FLUSH_WBINVD) {
> +			flush_chipset = i915_gem_clflush_object(obj, false);
> +		}
>  
>  		flush_domains |= obj->base.write_domain;
>  	}
> -- 
> 2.1.3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

  parent reply	other threads:[~2014-12-14 13:12 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1418526504-26316-1-git-send-email-benjamin.widawsky@intel.com>
2014-12-14  3:08 ` [PATCH 1/4] drm/cache: Use wbinvd helpers Ben Widawsky
2014-12-14 12:43   ` Chris Wilson
2014-12-15  7:49     ` Daniel Vetter
2014-12-15 20:26   ` [PATCH] [v2] " Ben Widawsky
2014-12-16  2:26     ` shuang.he
2014-12-16  7:56     ` Daniel Vetter
2014-12-14  3:08 ` [PATCH 2/4] drm/cache: Try to be smarter about clflushing on x86 Ben Widawsky
2014-12-14  4:15   ` Matt Turner
2014-12-15 22:07     ` Ben Widawsky
2014-12-14 12:59   ` [Intel-gfx] " Chris Wilson
2014-12-15  4:06     ` Jesse Barnes
2014-12-15 19:54       ` Ben Widawsky
2014-12-14  3:08 ` [PATCH 3/4] drm/cache: Return what type of cache flush occurred Ben Widawsky
2014-12-14  3:08 ` [PATCH 4/4] drm/i915: Opportunistically reduce flushing at execbuf Ben Widawsky
2014-12-14  9:35   ` shuang.he
2014-12-14 13:12   ` Ville Syrjälä [this message]
2014-12-14 23:37     ` Ben Widawsky
2014-12-15  7:55       ` [Intel-gfx] " Daniel Vetter
2014-12-15  8:20         ` Chris Wilson
2014-12-15 19:56           ` Ben Widawsky
2014-12-15 20:39             ` Chris Wilson
2014-12-15 21:06               ` [Intel-gfx] " Ben Widawsky
2014-12-16  7:57                 ` Chris Wilson
2014-12-16 19:38                   ` Ben Widawsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20141214131221.GD10649@intel.com \
    --to=ville.syrjala@linux.intel.com \
    --cc=ben@bwidawsk.net \
    --cc=benjamin.widawsky@intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox