intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint
@ 2017-02-22  9:41 Chris Wilson
  2017-02-22  9:41 ` [PATCH v3 2/6] drm/i915: Move cpu_cache_is_coherent() to header Chris Wilson
                   ` (6 more replies)
  0 siblings, 7 replies; 17+ messages in thread
From: Chris Wilson @ 2017-02-22  9:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

The change_domain tracepoint has been inaccurate for a few years - it
doesn't fully capture the domains, especially with userspace bypassing
them. It is defunct, misleading and time to be removed.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c            | 30 ------------------------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 ---
 drivers/gpu/drm/i915/i915_trace.h          | 24 ------------------------
 3 files changed, 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d93032875f28..9c5f091c771f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3214,9 +3214,6 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
 	intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
 
 	obj->base.write_domain = 0;
-	trace_i915_gem_object_change_domain(obj,
-					    obj->base.read_domains,
-					    I915_GEM_DOMAIN_GTT);
 }
 
 /** Flushes the CPU write domain for the object if it's dirty. */
@@ -3230,9 +3227,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
 
 	obj->base.write_domain = 0;
-	trace_i915_gem_object_change_domain(obj,
-					    obj->base.read_domains,
-					    I915_GEM_DOMAIN_CPU);
 }
 
 /**
@@ -3246,7 +3240,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
 int
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
-	uint32_t old_write_domain, old_read_domains;
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3284,9 +3277,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
 		mb();
 
-	old_write_domain = obj->base.write_domain;
-	old_read_domains = obj->base.read_domains;
-
 	/* It should now be out of any other write domains, and we can update
 	 * the domain values for our changes.
 	 */
@@ -3298,10 +3288,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 		obj->mm.dirty = true;
 	}
 
-	trace_i915_gem_object_change_domain(obj,
-					    old_read_domains,
-					    old_write_domain);
-
 	i915_gem_object_unpin_pages(obj);
 	return 0;
 }
@@ -3538,7 +3524,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     const struct i915_ggtt_view *view)
 {
 	struct i915_vma *vma;
-	u32 old_read_domains, old_write_domain;
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3603,19 +3588,12 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 		intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
 	}
 
-	old_write_domain = obj->base.write_domain;
-	old_read_domains = obj->base.read_domains;
-
 	/* It should now be out of any other write domains, and we can update
 	 * the domain values for our changes.
 	 */
 	obj->base.write_domain = 0;
 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
 
-	trace_i915_gem_object_change_domain(obj,
-					    old_read_domains,
-					    old_write_domain);
-
 	return vma;
 
 err_unpin_display:
@@ -3651,7 +3629,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 int
 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 {
-	uint32_t old_write_domain, old_read_domains;
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3670,9 +3647,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 
 	i915_gem_object_flush_gtt_write_domain(obj);
 
-	old_write_domain = obj->base.write_domain;
-	old_read_domains = obj->base.read_domains;
-
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
 		i915_gem_clflush_object(obj, false);
@@ -3693,10 +3667,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	}
 
-	trace_i915_gem_object_change_domain(obj,
-					    old_read_domains,
-					    old_write_domain);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index dfed503301a6..6fb29832bc63 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1317,8 +1317,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
-		u32 old_read = obj->base.read_domains;
-		u32 old_write = obj->base.write_domain;
 
 		obj->base.write_domain = obj->base.pending_write_domain;
 		if (obj->base.write_domain)
@@ -1329,7 +1327,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 
 		i915_vma_move_to_active(vma, req, vma->exec_entry->flags);
 		eb_export_fence(obj, req, vma->exec_entry->flags);
-		trace_i915_gem_object_change_domain(obj, old_read, old_write);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 0c2ed1289bfc..2dbef3147a60 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -175,30 +175,6 @@ TRACE_EVENT(i915_vma_unbind,
 		      __entry->obj, __entry->offset, __entry->size, __entry->vm)
 );
 
-TRACE_EVENT(i915_gem_object_change_domain,
-	    TP_PROTO(struct drm_i915_gem_object *obj, u32 old_read, u32 old_write),
-	    TP_ARGS(obj, old_read, old_write),
-
-	    TP_STRUCT__entry(
-			     __field(struct drm_i915_gem_object *, obj)
-			     __field(u32, read_domains)
-			     __field(u32, write_domain)
-			     ),
-
-	    TP_fast_assign(
-			   __entry->obj = obj;
-			   __entry->read_domains = obj->base.read_domains | (old_read << 16);
-			   __entry->write_domain = obj->base.write_domain | (old_write << 16);
-			   ),
-
-	    TP_printk("obj=%p, read=%02x=>%02x, write=%02x=>%02x",
-		      __entry->obj,
-		      __entry->read_domains >> 16,
-		      __entry->read_domains & 0xffff,
-		      __entry->write_domain >> 16,
-		      __entry->write_domain & 0xffff)
-);
-
 TRACE_EVENT(i915_gem_object_pwrite,
 	    TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len),
 	    TP_ARGS(obj, offset, len),
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v3 2/6] drm/i915: Move cpu_cache_is_coherent() to header
  2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
@ 2017-02-22  9:41 ` Chris Wilson
  2017-02-22 11:02   ` Joonas Lahtinen
  2017-02-22  9:41 ` [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects Chris Wilson
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2017-02-22  9:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

For use in the next patch, take the current is-coherent helper and add
it to i915_gem_object.h

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h |  6 ++++++
 drivers/gpu/drm/i915/i915_gem.c | 20 ++++++--------------
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4e7046df1b5e..b0e451efb519 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4111,4 +4111,10 @@ int remap_io_mapping(struct vm_area_struct *vma,
 		     unsigned long addr, unsigned long pfn, unsigned long size,
 		     struct io_mapping *iomap);
 
+static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj)
+{
+	return (obj->cache_level != I915_CACHE_NONE ||
+		HAS_LLC(to_i915(obj->base.dev)));
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9c5f091c771f..312154b3d04c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -48,18 +48,12 @@ static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
 
-static bool cpu_cache_is_coherent(struct drm_device *dev,
-				  enum i915_cache_level level)
-{
-	return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE;
-}
-
 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
 		return false;
 
-	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+	if (!i915_gem_object_is_coherent(obj))
 		return true;
 
 	return obj->pin_display;
@@ -255,7 +249,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 
 	if (needs_clflush &&
 	    (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
-	    !cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+	    !i915_gem_object_is_coherent(obj))
 		drm_clflush_sg(pages);
 
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
@@ -796,8 +790,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 	 * anyway again before the next pread happens.
 	 */
 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
-		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
-							obj->cache_level);
+		*needs_clflush = !i915_gem_object_is_coherent(obj);
 
 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
@@ -853,8 +846,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 	 * before writing.
 	 */
 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
-		*needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
-							 obj->cache_level);
+		*needs_clflush |= !i915_gem_object_is_coherent(obj);
 
 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
@@ -3173,7 +3165,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	 * snooping behaviour occurs naturally as the result of our domain
 	 * tracking.
 	 */
-	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
+	if (!force && i915_gem_object_is_coherent(obj)) {
 		obj->cache_dirty = true;
 		return;
 	}
@@ -3412,7 +3404,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	}
 
 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU &&
-	    cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+	    i915_gem_object_is_coherent(obj))
 		obj->cache_dirty = true;
 
 	list_for_each_entry(vma, &obj->vma_list, obj_link)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects
  2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
  2017-02-22  9:41 ` [PATCH v3 2/6] drm/i915: Move cpu_cache_is_coherent() to header Chris Wilson
@ 2017-02-22  9:41 ` Chris Wilson
  2017-02-22  9:54   ` Chris Wilson
  2017-02-22 10:57   ` Joonas Lahtinen
  2017-02-22  9:41 ` [PATCH v3 4/6] drm/i915: Skip clflushes for all non-page backed objects Chris Wilson
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 17+ messages in thread
From: Chris Wilson @ 2017-02-22  9:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c        | 41 +++++++++++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_object.h |  2 ++
 drivers/gpu/drm/i915/intel_display.c   |  9 ++------
 3 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 312154b3d04c..79ea585e82ac 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1613,23 +1613,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_sw_finish *args = data;
 	struct drm_i915_gem_object *obj;
-	int err = 0;
 
 	obj = i915_gem_object_lookup(file, args->handle);
 	if (!obj)
 		return -ENOENT;
 
 	/* Pinned buffers may be scanout, so flush the cache */
-	if (READ_ONCE(obj->pin_display)) {
-		err = i915_mutex_lock_interruptible(dev);
-		if (!err) {
-			i915_gem_object_flush_cpu_write_domain(obj);
-			mutex_unlock(&dev->struct_mutex);
-		}
-	}
-
+	i915_gem_object_flush_to_display(obj);
 	i915_gem_object_put(obj);
-	return err;
+
+	return 0;
 }
 
 /**
@@ -3221,6 +3214,27 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
 	obj->base.write_domain = 0;
 }
 
+static void __i915_gem_object_flush_to_display(struct drm_i915_gem_object *obj)
+{
+	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
+		return;
+
+	i915_gem_clflush_object(obj, true);
+	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+
+	obj->base.write_domain = 0;
+}
+
+void i915_gem_object_flush_to_display(struct drm_i915_gem_object *obj)
+{
+	if (!READ_ONCE(obj->pin_display))
+		return;
+
+	mutex_lock(&obj->base.dev->struct_mutex);
+	__i915_gem_object_flush_to_display(obj);
+	mutex_unlock(&obj->base.dev->struct_mutex);
+}
+
 /**
  * Moves a single object to the GTT read, and possibly write domain.
  * @obj: object to act on
@@ -3575,15 +3589,12 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
 	/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
-	if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
-		i915_gem_clflush_object(obj, true);
-		intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
-	}
+	__i915_gem_object_flush_to_display(obj);
+	intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
 
 	/* It should now be out of any other write domains, and we can update
 	 * the domain values for our changes.
 	 */
-	obj->base.write_domain = 0;
 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
 
 	return vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index ef4893a4f08c..2e4d80fb5204 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -364,5 +364,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 	return engine;
 }
 
+void i915_gem_object_flush_to_display(struct drm_i915_gem_object *obj);
+
 #endif
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f6feb93d4bb1..c43cc80d993b 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14279,15 +14279,10 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
 					struct drm_clip_rect *clips,
 					unsigned num_clips)
 {
-	struct drm_device *dev = fb->dev;
-	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
-	struct drm_i915_gem_object *obj = intel_fb->obj;
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 
-	mutex_lock(&dev->struct_mutex);
-	if (obj->pin_display && obj->cache_dirty)
-		i915_gem_clflush_object(obj, true);
+	i915_gem_object_flush_to_display(obj);
 	intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
-	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 }
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v3 4/6] drm/i915: Skip clflushes for all non-page backed objects
  2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
  2017-02-22  9:41 ` [PATCH v3 2/6] drm/i915: Move cpu_cache_is_coherent() to header Chris Wilson
  2017-02-22  9:41 ` [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects Chris Wilson
@ 2017-02-22  9:41 ` Chris Wilson
  2017-02-22  9:41 ` [PATCH v3 5/6] drm/i915: Perform object clflushing asynchronously Chris Wilson
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2017-02-22  9:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Generalise the skip for physical and stolen objects by skipping anything
we do not have a valid address for inside the sg.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 79ea585e82ac..2cf49b5c7594 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3140,14 +3140,19 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	 * to GPU, and we can ignore the cache flush because it'll happen
 	 * again at bind time.
 	 */
-	if (!obj->mm.pages)
+	if (!obj->mm.pages) {
+		GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
 		return;
+	}
 
 	/*
 	 * Stolen memory is always coherent with the GPU as it is explicitly
 	 * marked as wc by the system, or the system is cache-coherent.
+	 * Similarly, we only access struct pages through the CPU cache, so
+	 * anything not backed by physical memory we consider to be always
+	 * coherent and not need clflushing.
 	 */
-	if (obj->stolen || obj->phys_handle)
+	if (!i915_gem_object_has_struct_page(obj))
 		return;
 
 	/* If the GPU is snooping the contents of the CPU cache,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v3 5/6] drm/i915: Perform object clflushing asynchronously
  2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
                   ` (2 preceding siblings ...)
  2017-02-22  9:41 ` [PATCH v3 4/6] drm/i915: Skip clflushes for all non-page backed objects Chris Wilson
@ 2017-02-22  9:41 ` Chris Wilson
  2017-02-22 11:01   ` Joonas Lahtinen
  2017-02-22  9:41 ` [PATCH v3 6/6] drm/i915: Remove 'retire' parameter from intel_fb_obj_flush Chris Wilson
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2017-02-22  9:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Flushing the cachelines for an object is slow, can be as much as 100ms
for a large framebuffer. We currently do this under the struct_mutex BKL
on execution or on pageflip. But now with the ability to add fences to
obj->resv for both flips and execbuf (and we naturally wait on the fence
before CPU access), we can move the clflush operation to a workqueue and
signal a fence for completion, thereby doing the work asynchronously and
not blocking the driver or its clients.

v2: Introduce i915_gem_clflush.h and use a new name, split out some
extras into separate patches.

Suggested-by: Akash Goel <akash.goel@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile              |   1 +
 drivers/gpu/drm/i915/i915_drv.h            |   2 +-
 drivers/gpu/drm/i915/i915_gem.c            |  54 +--------
 drivers/gpu/drm/i915/i915_gem_clflush.c    | 189 +++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_clflush.h    |  37 ++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   9 +-
 drivers/gpu/drm/i915/intel_display.c       |  44 ++++---
 7 files changed, 264 insertions(+), 72 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.c
 create mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 1490d8622234..b1b580337c7a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
 # GEM code
 i915-y += i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
+	  i915_gem_clflush.o \
 	  i915_gem_context.o \
 	  i915_gem_dmabuf.o \
 	  i915_gem_evict.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b0e451efb519..efdf16143c46 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3389,7 +3389,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
 void i915_gem_reset(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
+
 void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
 int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2cf49b5c7594..53c16bb4663e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -29,6 +29,7 @@
 #include <drm/drm_vma_manager.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
+#include "i915_gem_clflush.h"
 #include "i915_vgpu.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
@@ -3133,46 +3134,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 	return 0;
 }
 
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
-			     bool force)
-{
-	/* If we don't have a page list set up, then we're not pinned
-	 * to GPU, and we can ignore the cache flush because it'll happen
-	 * again at bind time.
-	 */
-	if (!obj->mm.pages) {
-		GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
-		return;
-	}
-
-	/*
-	 * Stolen memory is always coherent with the GPU as it is explicitly
-	 * marked as wc by the system, or the system is cache-coherent.
-	 * Similarly, we only access struct pages through the CPU cache, so
-	 * anything not backed by physical memory we consider to be always
-	 * coherent and not need clflushing.
-	 */
-	if (!i915_gem_object_has_struct_page(obj))
-		return;
-
-	/* If the GPU is snooping the contents of the CPU cache,
-	 * we do not need to manually clear the CPU cache lines.  However,
-	 * the caches are only snooped when the render cache is
-	 * flushed/invalidated.  As we always have to emit invalidations
-	 * and flushes when moving into and out of the RENDER domain, correct
-	 * snooping behaviour occurs naturally as the result of our domain
-	 * tracking.
-	 */
-	if (!force && i915_gem_object_is_coherent(obj)) {
-		obj->cache_dirty = true;
-		return;
-	}
-
-	trace_i915_gem_object_clflush(obj);
-	drm_clflush_sg(obj->mm.pages);
-	obj->cache_dirty = false;
-}
-
 /** Flushes the GTT write domain for the object if it's dirty. */
 static void
 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
@@ -3213,9 +3174,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
 		return;
 
-	i915_gem_clflush_object(obj, obj->pin_display);
-	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
-
+	i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 	obj->base.write_domain = 0;
 }
 
@@ -3224,9 +3183,7 @@ static void __i915_gem_object_flush_to_display(struct drm_i915_gem_object *obj)
 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
 		return;
 
-	i915_gem_clflush_object(obj, true);
-	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
-
+	i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
 	obj->base.write_domain = 0;
 }
 
@@ -3657,8 +3614,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-		i915_gem_clflush_object(obj, false);
-
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
 	}
 
@@ -4526,6 +4482,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
 
+	i915_gem_clflush_init(dev_priv);
+
 	if (!i915.enable_execlists) {
 		dev_priv->gt.resume = intel_legacy_submission_resume;
 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
new file mode 100644
index 000000000000..c5fee02f3173
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+#include "intel_frontbuffer.h"
+#include "i915_gem_clflush.h"
+
+static DEFINE_SPINLOCK(clflush_lock);
+static u64 clflush_context;
+
+struct clflush {
+	struct dma_fence dma; /* Must be first for dma_fence_free() */
+	struct i915_sw_fence wait;
+	struct work_struct work;
+	struct drm_i915_gem_object *obj;
+};
+
+static const char *i915_clflush_get_driver_name(struct dma_fence *fence)
+{
+	return DRIVER_NAME;
+}
+
+static const char *i915_clflush_get_timeline_name(struct dma_fence *fence)
+{
+	return "clflush";
+}
+
+static bool i915_clflush_enable_signaling(struct dma_fence *fence)
+{
+	return true;
+}
+
+static void i915_clflush_release(struct dma_fence *fence)
+{
+	struct clflush *clflush = container_of(fence, typeof(*clflush), dma);
+
+	i915_sw_fence_fini(&clflush->wait);
+
+	BUILD_BUG_ON(offsetof(typeof(*clflush), dma));
+	dma_fence_free(&clflush->dma);
+}
+
+static const struct dma_fence_ops i915_clflush_ops = {
+	.get_driver_name = i915_clflush_get_driver_name,
+	.get_timeline_name = i915_clflush_get_timeline_name,
+	.enable_signaling = i915_clflush_enable_signaling,
+	.wait = dma_fence_default_wait,
+	.release = i915_clflush_release,
+};
+
+static void __i915_do_clflush(struct drm_i915_gem_object *obj)
+{
+	drm_clflush_sg(obj->mm.pages);
+	obj->cache_dirty = false;
+
+	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+}
+
+static void i915_clflush_work(struct work_struct *work)
+{
+	struct clflush *clflush = container_of(work, typeof(*clflush), work);
+	struct drm_i915_gem_object *obj = clflush->obj;
+
+	if (!obj->cache_dirty)
+		goto out;
+
+	if (i915_gem_object_pin_pages(obj)) {
+		DRM_ERROR("Failed to acquire obj->pages for clflushing\n");
+		goto out;
+	}
+
+	__i915_do_clflush(obj);
+
+	i915_gem_object_unpin_pages(obj);
+
+out:
+	i915_gem_object_put(obj);
+
+	dma_fence_signal(&clflush->dma);
+	dma_fence_put(&clflush->dma);
+}
+
+static int __i915_sw_fence_call
+i915_clflush_notify(struct i915_sw_fence *fence,
+		    enum i915_sw_fence_notify state)
+{
+	struct clflush *clflush = container_of(fence, typeof(*clflush), wait);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		schedule_work(&clflush->work);
+		break;
+
+	case FENCE_FREE:
+		dma_fence_put(&clflush->dma);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			     unsigned int flags)
+{
+	struct clflush *clflush;
+
+	/*
+	 * Stolen memory is always coherent with the GPU as it is explicitly
+	 * marked as wc by the system, or the system is cache-coherent.
+	 * Similarly, we only access struct pages through the CPU cache, so
+	 * anything not backed by physical memory we consider to be always
+	 * coherent and not need clflushing.
+	 */
+	if (!i915_gem_object_has_struct_page(obj))
+		return;
+
+	obj->cache_dirty = true;
+
+	/* If the GPU is snooping the contents of the CPU cache,
+	 * we do not need to manually clear the CPU cache lines.  However,
+	 * the caches are only snooped when the render cache is
+	 * flushed/invalidated.  As we always have to emit invalidations
+	 * and flushes when moving into and out of the RENDER domain, correct
+	 * snooping behaviour occurs naturally as the result of our domain
+	 * tracking.
+	 */
+	if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
+		return;
+
+	trace_i915_gem_object_clflush(obj);
+
+	clflush = NULL;
+	if (!(flags & I915_CLFLUSH_SYNC))
+		clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
+	if (clflush) {
+		dma_fence_init(&clflush->dma,
+			       &i915_clflush_ops,
+			       &clflush_lock,
+			       clflush_context,
+			       0);
+		i915_sw_fence_init(&clflush->wait, i915_clflush_notify);
+
+		clflush->obj = i915_gem_object_get(obj);
+		INIT_WORK(&clflush->work, i915_clflush_work);
+
+		dma_fence_get(&clflush->dma);
+
+		i915_sw_fence_await_reservation(&clflush->wait,
+						obj->resv, NULL,
+						false, I915_FENCE_TIMEOUT,
+						GFP_KERNEL);
+
+		reservation_object_lock(obj->resv, NULL);
+		reservation_object_add_excl_fence(obj->resv, &clflush->dma);
+		reservation_object_unlock(obj->resv);
+
+		i915_sw_fence_commit(&clflush->wait);
+	} else if (obj->mm.pages) {
+		__i915_do_clflush(obj);
+	} else {
+		GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
+	}
+}
+
+void i915_gem_clflush_init(struct drm_i915_private *i915)
+{
+	clflush_context = dma_fence_context_alloc(1);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h
new file mode 100644
index 000000000000..b62d61a2d15f
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __I915_GEM_CLFLUSH_H__
+#define __I915_GEM_CLFLUSH_H__
+
+struct drm_i915_private;
+struct drm_i915_gem_object;
+
+void i915_gem_clflush_init(struct drm_i915_private *i915);
+void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			     unsigned int flags);
+#define I915_CLFLUSH_FORCE BIT(0)
+#define I915_CLFLUSH_SYNC BIT(1)
+
+#endif /* __I915_GEM_CLFLUSH_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 6fb29832bc63..35d2cb979452 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
+#include "i915_gem_clflush.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
@@ -1114,13 +1115,15 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
 			continue;
 
+		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
+			i915_gem_clflush_object(obj, 0);
+			obj->base.write_domain = 0;
+		}
+
 		ret = i915_gem_request_await_object
 			(req, obj, obj->base.pending_write_domain);
 		if (ret)
 			return ret;
-
-		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
-			i915_gem_clflush_object(obj, false);
 	}
 
 	/* Unconditionally flush any chipset caches (for streaming writes). */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c43cc80d993b..2e6ec488d5a6 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -37,6 +37,7 @@
 #include "intel_frontbuffer.h"
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
+#include "i915_gem_clflush.h"
 #include "intel_dsi.h"
 #include "i915_trace.h"
 #include <drm/drm_atomic.h>
@@ -13189,6 +13190,29 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
 	int ret;
 
+	if (obj) {
+		if (plane->type == DRM_PLANE_TYPE_CURSOR &&
+		    INTEL_INFO(dev_priv)->cursor_needs_physical) {
+			const int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
+
+			ret = i915_gem_object_attach_phys(obj, align);
+			if (ret) {
+				DRM_DEBUG_KMS("failed to attach phys object\n");
+				return ret;
+			}
+		} else {
+			struct i915_vma *vma;
+
+			vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
+			if (IS_ERR(vma)) {
+				DRM_DEBUG_KMS("failed to pin object\n");
+				return PTR_ERR(vma);
+			}
+
+			to_intel_plane_state(new_state)->vma = vma;
+		}
+	}
+
 	if (!obj && !old_obj)
 		return 0;
 
@@ -13241,26 +13265,6 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY);
 	}
 
-	if (plane->type == DRM_PLANE_TYPE_CURSOR &&
-	    INTEL_INFO(dev_priv)->cursor_needs_physical) {
-		int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
-		ret = i915_gem_object_attach_phys(obj, align);
-		if (ret) {
-			DRM_DEBUG_KMS("failed to attach phys object\n");
-			return ret;
-		}
-	} else {
-		struct i915_vma *vma;
-
-		vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
-		if (IS_ERR(vma)) {
-			DRM_DEBUG_KMS("failed to pin object\n");
-			return PTR_ERR(vma);
-		}
-
-		to_intel_plane_state(new_state)->vma = vma;
-	}
-
 	return 0;
 }
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v3 6/6] drm/i915: Remove 'retire' parameter from intel_fb_obj_flush
  2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
                   ` (3 preceding siblings ...)
  2017-02-22  9:41 ` [PATCH v3 5/6] drm/i915: Perform object clflushing asynchronously Chris Wilson
@ 2017-02-22  9:41 ` Chris Wilson
  2017-02-22 11:04   ` Joonas Lahtinen
  2017-02-22 10:22 ` ✓ Fi.CI.BAT: success for series starting with [v3,1/6] drm/i915: Remove change_domain tracepoint Patchwork
  2017-02-22 10:30 ` [PATCH v3 1/6] " Joonas Lahtinen
  6 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2017-02-22  9:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Setting retire=true is identical to using origin=ORIGIN_CS, so make the
same simplification for intel_fb_obj_flush() as already employed for
intel_fb_obj_invalidate().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c          | 12 ++++++------
 drivers/gpu/drm/i915/i915_gem_clflush.c  |  2 +-
 drivers/gpu/drm/i915/intel_display.c     |  2 +-
 drivers/gpu/drm/i915/intel_frontbuffer.c |  3 +--
 drivers/gpu/drm/i915/intel_frontbuffer.h |  8 ++------
 5 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 53c16bb4663e..9beac37736b5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -628,7 +628,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 	drm_clflush_virt_range(vaddr, args->size);
 	i915_gem_chipset_flush(to_i915(obj->base.dev));
 
-	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+	intel_fb_obj_flush(obj, ORIGIN_CPU);
 	return 0;
 }
 
@@ -1275,7 +1275,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		user_data += page_length;
 		offset += page_length;
 	}
-	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+	intel_fb_obj_flush(obj, ORIGIN_CPU);
 
 	mutex_lock(&i915->drm.struct_mutex);
 out_unpin:
@@ -1411,7 +1411,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 		offset = 0;
 	}
 
-	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+	intel_fb_obj_flush(obj, ORIGIN_CPU);
 	i915_gem_obj_finish_shmem_access(obj);
 	return ret;
 }
@@ -3162,7 +3162,7 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
 	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
 		POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
 
-	intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
+	intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT));
 
 	obj->base.write_domain = 0;
 }
@@ -3552,7 +3552,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 
 	/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
 	__i915_gem_object_flush_to_display(obj);
-	intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
+	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
 
 	/* It should now be out of any other write domains, and we can update
 	 * the domain values for our changes.
@@ -3953,7 +3953,7 @@ frontbuffer_retire(struct i915_gem_active *active,
 	struct drm_i915_gem_object *obj =
 		container_of(active, typeof(*obj), frontbuffer_write);
 
-	intel_fb_obj_flush(obj, true, ORIGIN_CS);
+	intel_fb_obj_flush(obj, ORIGIN_CS);
 }
 
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index c5fee02f3173..d925fb582ba7 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -74,7 +74,7 @@ static void __i915_do_clflush(struct drm_i915_gem_object *obj)
 	drm_clflush_sg(obj->mm.pages);
 	obj->cache_dirty = false;
 
-	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+	intel_fb_obj_flush(obj, ORIGIN_CPU);
 }
 
 static void i915_clflush_work(struct work_struct *work)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2e6ec488d5a6..fb6ddaecd216 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14286,7 +14286,7 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 
 	i915_gem_object_flush_to_display(obj);
-	intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
+	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
index 966de4c7c7a2..fcfc217e754e 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -114,13 +114,12 @@ static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv,
 }
 
 void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
-			  bool retire,
 			  enum fb_op_origin origin,
 			  unsigned int frontbuffer_bits)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 
-	if (retire) {
+	if (origin == ORIGIN_CS) {
 		spin_lock(&dev_priv->fb_tracking.lock);
 		/* Filter out new bits since rendering started. */
 		frontbuffer_bits &= dev_priv->fb_tracking.busy_bits;
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h
index 7bab41218cf7..63cd9a753a72 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.h
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.h
@@ -38,7 +38,6 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
 			       enum fb_op_origin origin,
 			       unsigned int frontbuffer_bits);
 void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
-			  bool retire,
 			  enum fb_op_origin origin,
 			  unsigned int frontbuffer_bits);
 
@@ -69,15 +68,12 @@ static inline bool intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
 /**
  * intel_fb_obj_flush - flush frontbuffer object
  * @obj: GEM object to flush
- * @retire: set when retiring asynchronous rendering
  * @origin: which operation caused the flush
  *
  * This function gets called every time rendering on the given object has
- * completed and frontbuffer caching can be started again. If @retire is true
- * then any delayed flushes will be unblocked.
+ * completed and frontbuffer caching can be started again.
  */
 static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
-				      bool retire,
 				      enum fb_op_origin origin)
 {
 	unsigned int frontbuffer_bits;
@@ -86,7 +82,7 @@ static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
 	if (!frontbuffer_bits)
 		return;
 
-	__intel_fb_obj_flush(obj, retire, origin, frontbuffer_bits);
+	__intel_fb_obj_flush(obj, origin, frontbuffer_bits);
 }
 
 #endif /* __INTEL_FRONTBUFFER_H__ */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects
  2017-02-22  9:41 ` [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects Chris Wilson
@ 2017-02-22  9:54   ` Chris Wilson
  2017-02-22 10:57   ` Joonas Lahtinen
  1 sibling, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2017-02-22  9:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

On Wed, Feb 22, 2017 at 09:41:18AM +0000, Chris Wilson wrote:

Reason goes here.

We have three different paths by which userspace wants to flush the
display plane (i.e. objects with obj->pin_display). Use a common helper
to identify those paths and to simplify a later change.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [v3,1/6] drm/i915: Remove change_domain tracepoint
  2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
                   ` (4 preceding siblings ...)
  2017-02-22  9:41 ` [PATCH v3 6/6] drm/i915: Remove 'retire' parameter from intel_fb_obj_flush Chris Wilson
@ 2017-02-22 10:22 ` Patchwork
  2017-02-22 10:30 ` [PATCH v3 1/6] " Joonas Lahtinen
  6 siblings, 0 replies; 17+ messages in thread
From: Patchwork @ 2017-02-22 10:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,1/6] drm/i915: Remove change_domain tracepoint
URL   : https://patchwork.freedesktop.org/series/20047/
State : success

== Summary ==

Series 20047v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/20047/revisions/1/mbox/

fi-bdw-5557u     total:253  pass:242  dwarn:0   dfail:0   fail:0   skip:11 
fi-bsw-n3050     total:253  pass:214  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:253  pass:234  dwarn:0   dfail:0   fail:0   skip:19 
fi-bxt-t5700     total:83   pass:70   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:253  pass:226  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:253  pass:222  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:253  pass:237  dwarn:0   dfail:0   fail:0   skip:16 
fi-hsw-4770r     total:253  pass:237  dwarn:0   dfail:0   fail:0   skip:16 
fi-ilk-650       total:253  pass:203  dwarn:0   dfail:0   fail:0   skip:50 
fi-ivb-3520m     total:253  pass:235  dwarn:0   dfail:0   fail:0   skip:18 
fi-ivb-3770      total:253  pass:235  dwarn:0   dfail:0   fail:0   skip:18 
fi-kbl-7500u     total:253  pass:235  dwarn:0   dfail:0   fail:0   skip:18 
fi-skl-6260u     total:253  pass:243  dwarn:0   dfail:0   fail:0   skip:10 
fi-skl-6700hq    total:253  pass:236  dwarn:0   dfail:0   fail:0   skip:17 
fi-skl-6700k     total:253  pass:231  dwarn:4   dfail:0   fail:0   skip:18 
fi-skl-6770hq    total:253  pass:243  dwarn:0   dfail:0   fail:0   skip:10 
fi-snb-2520m     total:253  pass:225  dwarn:0   dfail:0   fail:0   skip:28 
fi-snb-2600      total:253  pass:224  dwarn:0   dfail:0   fail:0   skip:29 

e922d8924920be31fc76f5813bc1fde5d6cbf950 drm-tip: 2017y-02m-21d-16h-18m-14s UTC integration manifest
17c552e drm/i915: Remove 'retire' parameter from intel_fb_obj_flush
ca22004 drm/i915: Perform object clflushing asynchronously
c9d53f1 drm/i915: Skip clflushes for all non-page backed objects
13774c5 drm/i915: Amalgamate flushing of display objects
6d01261 drm/i915: Move cpu_cache_is_coherent() to header
2909fce drm/i915: Remove change_domain tracepoint

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3923/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint
  2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
                   ` (5 preceding siblings ...)
  2017-02-22 10:22 ` ✓ Fi.CI.BAT: success for series starting with [v3,1/6] drm/i915: Remove change_domain tracepoint Patchwork
@ 2017-02-22 10:30 ` Joonas Lahtinen
  6 siblings, 0 replies; 17+ messages in thread
From: Joonas Lahtinen @ 2017-02-22 10:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: matthew.auld

On ke, 2017-02-22 at 09:41 +0000, Chris Wilson wrote:
> The change_domain tracepoint has been inaccurate for a few years - it
> doesn't fully capture the domains, especially with userspace bypassing
> them. It is defunct, misleading and time to be removed.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects
  2017-02-22  9:41 ` [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects Chris Wilson
  2017-02-22  9:54   ` Chris Wilson
@ 2017-02-22 10:57   ` Joonas Lahtinen
  1 sibling, 0 replies; 17+ messages in thread
From: Joonas Lahtinen @ 2017-02-22 10:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: matthew.auld

Add the message from your reply.

On ke, 2017-02-22 at 09:41 +0000, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

<SNIP>

> @@ -1613,23 +1613,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
>  {
>  	struct drm_i915_gem_sw_finish *args = data;
>  	struct drm_i915_gem_object *obj;
> -	int err = 0;
>  
>  	obj = i915_gem_object_lookup(file, args->handle);
>  	if (!obj)
>  		return -ENOENT;
>  
>  	/* Pinned buffers may be scanout, so flush the cache */
> -	if (READ_ONCE(obj->pin_display)) {
> -		err = i915_mutex_lock_interruptible(dev);
> -		if (!err) {
> -			i915_gem_object_flush_cpu_write_domain(obj);
> -			mutex_unlock(&dev->struct_mutex);
> -		}
> -	}
> -
> +	i915_gem_object_flush_to_display(obj);

.._flush_if_display() and this is

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 5/6] drm/i915: Perform object clflushing asynchronously
  2017-02-22  9:41 ` [PATCH v3 5/6] drm/i915: Perform object clflushing asynchronously Chris Wilson
@ 2017-02-22 11:01   ` Joonas Lahtinen
  0 siblings, 0 replies; 17+ messages in thread
From: Joonas Lahtinen @ 2017-02-22 11:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: matthew.auld

On ke, 2017-02-22 at 09:41 +0000, Chris Wilson wrote:
> Flushing the cachelines for an object is slow, can be as much as 100ms
> for a large framebuffer. We currently do this under the struct_mutex BKL
> on execution or on pageflip. But now with the ability to add fences to
> obj->resv for both flips and execbuf (and we naturally wait on the fence
> before CPU access), we can move the clflush operation to a workqueue and
> signal a fence for completion, thereby doing the work asynchronously and
> not blocking the driver or its clients.
> 
> v2: Introduce i915_gem_clflush.h and use a new name, split out some
> extras into separate patches.
> 
> Suggested-by: Akash Goel <akash.goel@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>

Reviewed-by: Joonas Lahtien <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 2/6] drm/i915: Move cpu_cache_is_coherent() to header
  2017-02-22  9:41 ` [PATCH v3 2/6] drm/i915: Move cpu_cache_is_coherent() to header Chris Wilson
@ 2017-02-22 11:02   ` Joonas Lahtinen
  0 siblings, 0 replies; 17+ messages in thread
From: Joonas Lahtinen @ 2017-02-22 11:02 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: matthew.auld

On ke, 2017-02-22 at 09:41 +0000, Chris Wilson wrote:
> For use in the next patch, take the current is-coherent helper and add
> it to i915_gem_object.h
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 6/6] drm/i915: Remove 'retire' parameter from intel_fb_obj_flush
  2017-02-22  9:41 ` [PATCH v3 6/6] drm/i915: Remove 'retire' parameter from intel_fb_obj_flush Chris Wilson
@ 2017-02-22 11:04   ` Joonas Lahtinen
  0 siblings, 0 replies; 17+ messages in thread
From: Joonas Lahtinen @ 2017-02-22 11:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: matthew.auld

On ke, 2017-02-22 at 09:41 +0000, Chris Wilson wrote:
> Setting retire=true is identical to using origin=ORIGIN_CS, so make the
> same simplification for intel_fb_obj_flush() as already employed for
> intel_fb_obj_invalidate().
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects
@ 2017-09-12 22:11 Yaroslav Shabalin
  2017-09-14 10:48 ` Yaroslav Shabalin
  2017-09-14 10:57 ` Chris Wilson
  0 siblings, 2 replies; 17+ messages in thread
From: Yaroslav Shabalin @ 2017-09-12 22:11 UTC (permalink / raw)
  To: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 2426 bytes --]

Hi!

I would like to report a problem which I believe is connected to changes
introduced in this patch (
https://lists.freedesktop.org/archives/intel-gfx/2017-February/120777.html).
Specifically the problem is: screen freezes for 0.5 - 5 seconds (mostly for
2-3 seconds) when PSR is enabled (i915.enable_psr=1 kernel boot option).
Freezes are more frequent with rich, dynamic screen content. The most
reliable way to reproduce it, I discovered so far, is to start Gnome "Disk
Usage Analyzer", analyze some catalog and move mouse pointer over color
rectangles for a while. Usually that way I get 3-5 freezes in a minute.
During normal PC usage freezes are happening less frequently, but still
annoyingly. For the first time this problem appeared after update to kernel
4.12 and still there in recent 4.13 too.
I've looked into it a little and seems that mentioned patch is the one that
brought it in. Git bisect indicates it as the first bad commit. After
realizing that I tried to partly revert changes in the patch and found that
following change:

 	/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
-	if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
-		i915_gem_clflush_object(obj, true);
-		intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
-	}
+	__i915_gem_object_flush_to_display(obj);
+	intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);

when reverted solves the problem completely. One of the main
difference is that in old version intel_fb_obj_flush was called
conditionally and now it is called every time. So I surrounded the
call with "if" just to check and it worked:

/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
-	if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
-		i915_gem_clflush_object(obj, true);
-		intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
-	}
+	__i915_gem_object_flush_to_display(obj);
+	if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
+	  intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
+ }

 The above code was just a guess based on some logical insight.
Although it eliminates the freezes, I don't really know if the change
in calling intel_fb_obj_flush wasn't made intentionally. So could you
please look at that case?

If you need any help in reproducing problem conditions or further
diagnostic information, please let me know.


Thank you in advance,

Yaroslav Shabalin

[-- Attachment #1.2: Type: text/html, Size: 2904 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects
  2017-09-12 22:11 [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects Yaroslav Shabalin
@ 2017-09-14 10:48 ` Yaroslav Shabalin
  2017-09-14 10:57 ` Chris Wilson
  1 sibling, 0 replies; 17+ messages in thread
From: Yaroslav Shabalin @ 2017-09-14 10:48 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 2709 bytes --]

Dear Chris,

As you are the author of original patch, could you please take a look at my
report below.

On Wed, Sep 13, 2017 at 1:11 AM, Yaroslav Shabalin <
yaroslav.shabalin@gmail.com> wrote:

> Hi!
>
> I would like to report a problem which I believe is connected to changes
> introduced in this patch (https://lists.freedesktop.org
> /archives/intel-gfx/2017-February/120777.html). Specifically the problem
> is: screen freezes for 0.5 - 5 seconds (mostly for 2-3 seconds) when PSR is
> enabled (i915.enable_psr=1 kernel boot option). Freezes are more frequent
> with rich, dynamic screen content. The most reliable way to reproduce it, I
> discovered so far, is to start Gnome "Disk Usage Analyzer", analyze some
> catalog and move mouse pointer over color rectangles for a while. Usually
> that way I get 3-5 freezes in a minute. During normal PC usage freezes are
> happening less frequently, but still annoyingly. For the first time this
> problem appeared after update to kernel 4.12 and still there in recent 4.13
> too.
> I've looked into it a little and seems that mentioned patch is the one
> that brought it in. Git bisect indicates it as the first bad commit. After
> realizing that I tried to partly revert changes in the patch and found that
> following change:
>
>  	/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
> -	if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
> -		i915_gem_clflush_object(obj, true);
> -		intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
> -	}
> +	__i915_gem_object_flush_to_display(obj);
> +	intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
>
> when reverted solves the problem completely. One of the main difference is that in old version intel_fb_obj_flush was called conditionally and now it is called every time. So I surrounded the call with "if" just to check and it worked:
>
> /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
> -	if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
> -		i915_gem_clflush_object(obj, true);
> -		intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
> -	}
> +	__i915_gem_object_flush_to_display(obj);
> +	if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
> +	  intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
> + }
>
>  The above code was just a guess based on some logical insight. Although it eliminates the freezes, I don't really know if the change in calling intel_fb_obj_flush wasn't made intentionally. So could you please look at that case?
>
> If you need any help in reproducing problem conditions or further diagnostic information, please let me know.
>
>
> Thank you in advance,
>
> Yaroslav Shabalin
>
>

[-- Attachment #1.2: Type: text/html, Size: 3463 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects
  2017-09-12 22:11 [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects Yaroslav Shabalin
  2017-09-14 10:48 ` Yaroslav Shabalin
@ 2017-09-14 10:57 ` Chris Wilson
  2017-09-14 12:27   ` Yaroslav Shabalin
  1 sibling, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2017-09-14 10:57 UTC (permalink / raw)
  To: Yaroslav Shabalin, intel-gfx

Quoting Yaroslav Shabalin (2017-09-12 23:11:03)
> Hi!
> 
> I would like to report a problem which I believe is connected to changes
> introduced in this patch (https://lists.freedesktop.org/archives/intel-gfx/
> 2017-February/120777.html). Specifically the problem is: screen freezes for 0.5
> - 5 seconds (mostly for 2-3 seconds) when PSR is enabled (i915.enable_psr=1
> kernel boot option). Freezes are more frequent with rich, dynamic screen
> content. The most reliable way to reproduce it, I discovered so far, is to
> start Gnome "Disk Usage Analyzer", analyze some catalog and move mouse pointer
> over color rectangles for a while. Usually that way I get 3-5 freezes in a
> minute. During normal PC usage freezes are happening less frequently, but still
> annoyingly. For the first time this problem appeared after update to kernel
> 4.12 and still there in recent 4.13 too.
> I've looked into it a little and seems that mentioned patch is the one that
> brought it in. Git bisect indicates it as the first bad commit. After realizing
> that I tried to partly revert changes in the patch and found that following
> change:
> 
>         /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
> -       if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
> -               i915_gem_clflush_object(obj, true);
> -               intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
> -       }
> +       __i915_gem_object_flush_to_display(obj);
> +       intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
> 
> when reverted solves the problem completely. One of the main difference is that in old version intel_fb_obj_flush was called conditionally and now it is called every time. So I surrounded the call with "if" just to check and it worked:

Honestly, that doesn't seem like causation for the PSR failure.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects
  2017-09-14 10:57 ` Chris Wilson
@ 2017-09-14 12:27   ` Yaroslav Shabalin
  0 siblings, 0 replies; 17+ messages in thread
From: Yaroslav Shabalin @ 2017-09-14 12:27 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 3187 bytes --]

On Thu, Sep 14, 2017 at 1:57 PM, Chris Wilson <chris@chris-wilson.co.uk>
wrote:

> Quoting Yaroslav Shabalin (2017-09-12 23:11:03)
> > Hi!
> >
> > I would like to report a problem which I believe is connected to changes
> > introduced in this patch (https://lists.freedesktop.
> org/archives/intel-gfx/
> > 2017-February/120777.html). Specifically the problem is: screen freezes
> for 0.5
> > - 5 seconds (mostly for 2-3 seconds) when PSR is enabled
> (i915.enable_psr=1
> > kernel boot option). Freezes are more frequent with rich, dynamic screen
> > content. The most reliable way to reproduce it, I discovered so far, is
> to
> > start Gnome "Disk Usage Analyzer", analyze some catalog and move mouse
> pointer
> > over color rectangles for a while. Usually that way I get 3-5 freezes in
> a
> > minute. During normal PC usage freezes are happening less frequently,
> but still
> > annoyingly. For the first time this problem appeared after update to
> kernel
> > 4.12 and still there in recent 4.13 too.
> > I've looked into it a little and seems that mentioned patch is the one
> that
> > brought it in. Git bisect indicates it as the first bad commit. After
> realizing
> > that I tried to partly revert changes in the patch and found that
> following
> > change:
> >
> >         /* Treat this as an end-of-frame, like
> intel_user_framebuffer_dirty() */
> > -       if (obj->cache_dirty || obj->base.write_domain ==
> I915_GEM_DOMAIN_CPU) {
> > -               i915_gem_clflush_object(obj, true);
> > -               intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
> > -       }
> > +       __i915_gem_object_flush_to_display(obj);
> > +       intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
> >
> > when reverted solves the problem completely. One of the main difference
> is that in old version intel_fb_obj_flush was called conditionally and now
> it is called every time. So I surrounded the call with "if" just to check
> and it worked:
>
> Honestly, that doesn't seem like causation for the PSR failure.
> -Chriss
>

I see your doubts about connection between this two facts, but still that
only change makes PSR unusable on my laptop. I am pretty comfortable with
applying the patch every time I compile kernel to get it working. Just
wanted to point out potential problem for the benefit of other users.
Actually it took me some time to git bisect and find the root reason. I
wouldn't bother you if wasn't sure about it, please take it seriously. And
despite that there is no visible connection between PSR and that commit I
presume there may exist some kind of side effects.
Actually I can't give any technical argument since I don't know a thing
about how the driver works, it's internal structure, etc. But there are
facts that I know and I try to find some reasoning based on logic and
common sense. As a result there are questions, that I guess may get us
closer to understanding it. Was moving intel_fb_obj_flush call out of
conditional check an intentional change? Is it really needed to call
intel_fb_obj_flush every time even if cache is not dirty or write_domain is
not equal to that specific value? I would really appreciate if you take
some time to answer it.

[-- Attachment #1.2: Type: text/html, Size: 3992 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2017-09-14 12:27 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-02-22  9:41 [PATCH v3 1/6] drm/i915: Remove change_domain tracepoint Chris Wilson
2017-02-22  9:41 ` [PATCH v3 2/6] drm/i915: Move cpu_cache_is_coherent() to header Chris Wilson
2017-02-22 11:02   ` Joonas Lahtinen
2017-02-22  9:41 ` [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects Chris Wilson
2017-02-22  9:54   ` Chris Wilson
2017-02-22 10:57   ` Joonas Lahtinen
2017-02-22  9:41 ` [PATCH v3 4/6] drm/i915: Skip clflushes for all non-page backed objects Chris Wilson
2017-02-22  9:41 ` [PATCH v3 5/6] drm/i915: Perform object clflushing asynchronously Chris Wilson
2017-02-22 11:01   ` Joonas Lahtinen
2017-02-22  9:41 ` [PATCH v3 6/6] drm/i915: Remove 'retire' parameter from intel_fb_obj_flush Chris Wilson
2017-02-22 11:04   ` Joonas Lahtinen
2017-02-22 10:22 ` ✓ Fi.CI.BAT: success for series starting with [v3,1/6] drm/i915: Remove change_domain tracepoint Patchwork
2017-02-22 10:30 ` [PATCH v3 1/6] " Joonas Lahtinen
  -- strict thread matches above, loose matches on Subject: below --
2017-09-12 22:11 [PATCH v3 3/6] drm/i915: Amalgamate flushing of display objects Yaroslav Shabalin
2017-09-14 10:48 ` Yaroslav Shabalin
2017-09-14 10:57 ` Chris Wilson
2017-09-14 12:27   ` Yaroslav Shabalin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).