public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
From: Ben Widawsky <benjamin.widawsky@intel.com>
To: Intel GFX <intel-gfx@lists.freedesktop.org>
Cc: Ben Widawsky <ben@bwidawsk.net>,
	Ben Widawsky <benjamin.widawsky@intel.com>
Subject: [PATCH 3/6] drm/i915: Opportunistically reduce flushing at execbuf
Date: Mon,  9 Feb 2015 13:54:16 -0800	[thread overview]
Message-ID: <1423518859-6199-4-git-send-email-benjamin.widawsky@intel.com> (raw)
In-Reply-To: <1423518859-6199-1-git-send-email-benjamin.widawsky@intel.com>

If we're moving a bunch of buffers from the CPU domain to the GPU domain, and
we've already blown out the entire cache via a wbinvd, there is nothing more to
do.

With this and the previous patches, I am seeing a 3x FPS increase on a certain
benchmark which uses a giant 2d array texture. Unless I missed something in the
code, it should only effect non-LLC i915 platforms.

I haven't yet run any numbers for other benchmarks, nor have I attempted to
check if various conformance tests still pass.

v2: Rewrite the patch to be i915 only
Obtain whether or not we wbinvd up front.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h            |  8 ++++++++
 drivers/gpu/drm/i915/i915_gem.c            | 11 +++++------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 20 ++++++++++++++++----
 drivers/gpu/drm/i915/intel_lrc.c           | 10 ++++++++--
 4 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 90ff6aa..5d2f62d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1643,6 +1643,7 @@ struct i915_workarounds {
 
 struct eb_vmas {
 	struct list_head vmas;
+	bool do_wbinvd;
 	int and;
 	union {
 		struct i915_vma *lut[0];
@@ -1913,6 +1914,8 @@ struct drm_i915_private {
 		void (*stop_ring)(struct intel_engine_cs *ring);
 	} gt;
 
+	size_t wbinvd_threshold;
+
 	uint32_t request_uniq;
 
 	/*
@@ -2810,6 +2813,11 @@ static inline bool i915_stop_ring_allow_warn(struct drm_i915_private *dev_priv)
 
 void i915_gem_reset(struct drm_device *dev);
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
+static inline bool cpu_cache_is_coherent(struct drm_device *dev,
+					 enum i915_cache_level level)
+{
+	return HAS_LLC(dev) || level != I915_CACHE_NONE;
+}
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int i915_gem_init_rings(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fc81889..5bfb332 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -61,12 +61,6 @@ static int i915_gem_shrinker_oom(struct notifier_block *nb,
 				 void *ptr);
 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
 
-static bool cpu_cache_is_coherent(struct drm_device *dev,
-				  enum i915_cache_level level)
-{
-	return HAS_LLC(dev) || level != I915_CACHE_NONE;
-}
-
 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
 	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
@@ -4878,6 +4872,11 @@ int i915_gem_init(struct drm_device *dev)
 		dev_priv->gt.stop_ring = intel_logical_ring_stop;
 	}
 
+	dev_priv->wbinvd_threshold = boot_cpu_data.x86_cache_size << 10;
+	/* Pick a high default in the unlikely case we got nothing */
+	if (!dev_priv->wbinvd_threshold)
+		dev_priv->wbinvd_threshold = (8 << 20);
+
 	ret = i915_gem_init_userptr(dev);
 	if (ret)
 		goto out_unlock;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 13ed13e..56f9268 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -50,7 +50,7 @@ eb_create(struct drm_i915_gem_execbuffer2 *args)
 		unsigned size = args->buffer_count;
 		size *= sizeof(struct i915_vma *);
 		size += sizeof(struct eb_vmas);
-		eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+		eb = kzalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
 	}
 
 	if (eb == NULL) {
@@ -78,6 +78,7 @@ eb_reset(struct eb_vmas *eb)
 {
 	if (eb->and >= 0)
 		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
+	eb->do_wbinvd = false;
 }
 
 static int
@@ -154,6 +155,11 @@ eb_lookup_vmas(struct eb_vmas *eb,
 			hlist_add_head(&vma->exec_node,
 				       &eb->buckets[handle & eb->and]);
 		}
+
+		if (vma->node.size >= to_i915(obj->base.dev)->wbinvd_threshold &&
+		    obj->base.write_domain & I915_GEM_DOMAIN_CPU &&
+		    !cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+			eb->do_wbinvd = true;
 		++i;
 	}
 
@@ -826,7 +832,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 	struct list_head *vmas = &eb->vmas;
 	struct i915_vma *vma;
 	uint32_t flush_domains = 0;
-	bool flush_chipset = false;
+	bool flush_chipset = eb->do_wbinvd;
 	int ret;
 
 	list_for_each_entry(vma, vmas, exec_list) {
@@ -835,12 +841,18 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 		if (ret)
 			return ret;
 
+		flush_domains |= obj->base.write_domain;
+
+		if (eb->do_wbinvd)
+			continue;
+
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 			flush_chipset |= i915_gem_clflush_object(obj, false);
-
-		flush_domains |= obj->base.write_domain;
 	}
 
+	if (eb->do_wbinvd)
+		wbinvd();
+
 	if (flush_chipset)
 		i915_gem_chipset_flush(ring->dev);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 03741f9..16ca4a2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -586,12 +586,18 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
 		if (ret)
 			return ret;
 
+		flush_domains |= obj->base.write_domain;
+
+		if (eb->do_wbinvd)
+			continue;
+
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 			i915_gem_clflush_object(obj, false);
-
-		flush_domains |= obj->base.write_domain;
 	}
 
+	if (eb->do_wbinvd)
+		wbinvd();
+
 	if (flush_domains & I915_GEM_DOMAIN_GTT)
 		wmb();
 
-- 
2.3.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2015-02-09 21:54 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-09 21:54 [PATCH 0/6] Prefer wbinvd() where appropriate Ben Widawsky
2015-02-09 21:54 ` [PATCH 1/6] drm/i915: Remove the useless flush_chipset Ben Widawsky
2015-02-10  9:18   ` Chris Wilson
2015-02-09 21:54 ` [PATCH 2/6] drm/i915: Pass eb_vmas to execbuffer implementations Ben Widawsky
2015-02-10  9:19   ` Chris Wilson
2015-02-09 21:54 ` Ben Widawsky [this message]
2015-02-10  9:21   ` [PATCH 3/6] drm/i915: Opportunistically reduce flushing at execbuf Chris Wilson
2015-02-09 21:54 ` [PATCH 4/6] drm/i915: Add debugfs knobs for wbinvd threshold Ben Widawsky
2015-02-09 21:54 ` [PATCH 5/6] drm/i915: Extract checking the necessity of flush Ben Widawsky
2015-02-09 21:54 ` [PATCH 6/6] drm/i915: obey wbinvd threshold in more places Ben Widawsky
2015-02-10  9:28   ` Chris Wilson
2015-02-10 20:49   ` shuang.he

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1423518859-6199-4-git-send-email-benjamin.widawsky@intel.com \
    --to=benjamin.widawsky@intel.com \
    --cc=ben@bwidawsk.net \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox