[PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer
@ 2014-01-01 14:00 Chris Wilson
  2014-01-01 14:00 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Chris Wilson @ 2014-01-01 14:00 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky

One side-effect of the introduction of ppgtt was that we needed to
rebind the object into the appropriate vm (and global gtt in some
peculiar cases). For simplicity this was done twice for every object on
every call to execbuffer. However, that adds a tremendous amount of CPU
overhead (rewriting all the PTE for all objects into WC memory) per
draw. The fix is to push all the decision about which vm to bind into
and when down into the low-level bind routines through hints rather than
inside the execbuffer routine.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72906
Tested-by: jianx.zhou@intel.com
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h            |  25 +++---
 drivers/gpu/drm/i915/i915_gem.c            | 119 +++++++++++------------------
 drivers/gpu/drm/i915/i915_gem_context.c    |   9 +--
 drivers/gpu/drm/i915/i915_gem_evict.c      |  12 +--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  18 +++--
 drivers/gpu/drm/i915/i915_gem_gtt.c        |   2 +-
 drivers/gpu/drm/i915/i915_trace.h          |  20 ++---
 drivers/gpu/drm/i915/intel_overlay.c       |   2 +-
 drivers/gpu/drm/i915/intel_pm.c            |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  19 +++--
 10 files changed, 103 insertions(+), 125 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cc8afff878b1..65379474f312 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2033,11 +2033,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
 int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
 				     struct i915_address_space *vm,
 				     uint32_t alignment,
-				     bool map_and_fenceable,
-				     bool nonblocking);
-void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
+				     unsigned flags);
+#define PIN_MAPPABLE 0x1
+#define PIN_GLOBAL 0x2
+#define PIN_NONBLOCK 0x4
 int __must_check i915_vma_unbind(struct i915_vma *vma);
-int __must_check i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj);
 int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
 void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
 void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
@@ -2237,13 +2237,19 @@ i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj)
 static inline int __must_check
 i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
 		      uint32_t alignment,
-		      bool map_and_fenceable,
-		      bool nonblocking)
+		      unsigned flags)
 {
-	return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment,
-				   map_and_fenceable, nonblocking);
+	return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment, flags | PIN_GLOBAL);
 }
 
+static inline int
+i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
+{
+	return i915_vma_unbind(i915_gem_obj_to_ggtt(obj));
+}
+
+void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
+
 /* i915_gem_context.c */
 #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base)
 int __must_check i915_gem_context_init(struct drm_device *dev);
@@ -2280,8 +2286,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
 					  int min_size,
 					  unsigned alignment,
 					  unsigned cache_level,
-					  bool mappable,
-					  bool nonblock);
+					  unsigned flags);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 int i915_gem_evict_everything(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 656406deada0..d9acfa78d1a5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -43,12 +43,6 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
 static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly);
-static __must_check int
-i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
-			   struct i915_address_space *vm,
-			   unsigned alignment,
-			   bool map_and_fenceable,
-			   bool nonblocking);
 static int i915_gem_phys_pwrite(struct drm_device *dev,
 				struct drm_i915_gem_object *obj,
 				struct drm_i915_gem_pwrite *args,
@@ -605,7 +599,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 	char __user *user_data;
 	int page_offset, page_length, ret;
 
-	ret = i915_gem_obj_ggtt_pin(obj, 0, true, true);
+	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
 	if (ret)
 		goto out;
 
@@ -1399,7 +1393,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 
 	/* Now bind it into the GTT if needed */
-	ret = i915_gem_obj_ggtt_pin(obj,  0, true, false);
+	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
 	if (ret)
 		goto unlock;
 
@@ -2767,7 +2761,6 @@ int i915_vma_unbind(struct i915_vma *vma)
 
 	if (!drm_mm_node_allocated(&vma->node)) {
 		i915_gem_vma_destroy(vma);
-
 		return 0;
 	}
 
@@ -2819,26 +2812,6 @@ int i915_vma_unbind(struct i915_vma *vma)
 	return 0;
 }
 
-/**
- * Unbinds an object from the global GTT aperture.
- */
-int
-i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-	struct i915_address_space *ggtt = &dev_priv->gtt.base;
-
-	if (!i915_gem_obj_ggtt_bound(obj))
-		return 0;
-
-	if (i915_gem_obj_to_ggtt(obj)->pin_count)
-		return -EBUSY;
-
-	BUG_ON(obj->pages == NULL);
-
-	return i915_vma_unbind(i915_gem_obj_to_vma(obj, ggtt));
-}
-
 int i915_gpu_idle(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -3256,18 +3229,17 @@ static void i915_gem_verify_gtt(struct drm_device *dev)
 /**
  * Finds free space in the GTT aperture and binds the object there.
  */
-static int
+static struct i915_vma *
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 			   struct i915_address_space *vm,
 			   unsigned alignment,
-			   bool map_and_fenceable,
-			   bool nonblocking)
+			   unsigned flags)
 {
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	u32 size, fence_size, fence_alignment, unfenced_alignment;
 	size_t gtt_max =
-		map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total;
+		flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
 	struct i915_vma *vma;
 	int ret;
 
@@ -3279,18 +3251,18 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 						     obj->tiling_mode, true);
 	unfenced_alignment =
 		i915_gem_get_gtt_alignment(dev,
-						    obj->base.size,
-						    obj->tiling_mode, false);
+					   obj->base.size,
+					   obj->tiling_mode, false);
 
 	if (alignment == 0)
-		alignment = map_and_fenceable ? fence_alignment :
+		alignment = flags & PIN_MAPPABLE ? fence_alignment :
 						unfenced_alignment;
-	if (map_and_fenceable && alignment & (fence_alignment - 1)) {
+	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
 		DRM_ERROR("Invalid object alignment requested %u\n", alignment);
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 	}
 
-	size = map_and_fenceable ? fence_size : obj->base.size;
+	size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
 
 	/* If the object is bigger than the entire aperture, reject it early
 	 * before evicting everything in a vain attempt to find space.
@@ -3298,22 +3270,20 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 	if (obj->base.size > gtt_max) {
 		DRM_ERROR("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n",
 			  obj->base.size,
-			  map_and_fenceable ? "mappable" : "total",
+			  flags & PIN_MAPPABLE ? "mappable" : "total",
 			  gtt_max);
-		return -E2BIG;
+		return ERR_PTR(-E2BIG);
 	}
 
 	ret = i915_gem_object_get_pages(obj);
 	if (ret)
-		return ret;
+		return ERR_PTR(ret);
 
 	i915_gem_object_pin_pages(obj);
 
 	vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
+	if (IS_ERR(vma))
 		goto err_unpin;
-	}
 
 search_free:
 	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
@@ -3322,9 +3292,7 @@ search_free:
 						  DRM_MM_SEARCH_DEFAULT);
 	if (ret) {
 		ret = i915_gem_evict_something(dev, vm, size, alignment,
-					       obj->cache_level,
-					       map_and_fenceable,
-					       nonblocking);
+					       obj->cache_level, flags);
 		if (ret == 0)
 			goto search_free;
 
@@ -3355,19 +3323,23 @@ search_free:
 		obj->map_and_fenceable = mappable && fenceable;
 	}
 
-	WARN_ON(map_and_fenceable && !obj->map_and_fenceable);
+	WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
+
+	trace_i915_vma_bind(vma, flags);
+	vma->bind_vma(vma, obj->cache_level,
+		      flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0);
 
-	trace_i915_vma_bind(vma, map_and_fenceable);
 	i915_gem_verify_gtt(dev);
-	return 0;
+	return vma;
 
 err_remove_node:
 	drm_mm_remove_node(&vma->node);
 err_free_vma:
 	i915_gem_vma_destroy(vma);
+	vma = ERR_PTR(ret);
 err_unpin:
 	i915_gem_object_unpin_pages(obj);
-	return ret;
+	return vma;
 }
 
 bool
@@ -3559,7 +3531,9 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		}
 
 		list_for_each_entry(vma, &obj->vma_list, vma_link)
-			vma->bind_vma(vma, cache_level, 0);
+			if (drm_mm_node_allocated(&vma->node))
+				vma->bind_vma(vma, cache_level,
+					      obj->has_global_gtt_mapping ? GLOBAL_BIND : 0);
 	}
 
 	list_for_each_entry(vma, &obj->vma_list, vma_link)
@@ -3728,7 +3702,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
 	 * always use map_and_fenceable for all scanout buffers.
 	 */
-	ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false);
+	ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE);
 	if (ret)
 		goto err_unpin_display;
 
@@ -3884,52 +3858,49 @@ int
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
 		    uint32_t alignment,
-		    bool map_and_fenceable,
-		    bool nonblocking)
+		    unsigned flags)
 {
-	const u32 flags = map_and_fenceable ? GLOBAL_BIND : 0;
 	struct i915_vma *vma;
 	int ret;
 
-	WARN_ON(map_and_fenceable && !i915_is_ggtt(vm));
+	if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
+		return -EINVAL;
 
 	vma = i915_gem_obj_to_vma(obj, vm);
-
 	if (vma) {
 		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
 			return -EBUSY;
 
 		if ((alignment &&
 		     vma->node.start & (alignment - 1)) ||
-		    (map_and_fenceable && !obj->map_and_fenceable)) {
+		    (flags & PIN_MAPPABLE && !obj->map_and_fenceable)) {
 			WARN(vma->pin_count,
 			     "bo is already pinned with incorrect alignment:"
 			     " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
 			     " obj->map_and_fenceable=%d\n",
 			     i915_gem_obj_offset(obj, vm), alignment,
-			     map_and_fenceable,
+			     flags & PIN_MAPPABLE,
 			     obj->map_and_fenceable);
 			ret = i915_vma_unbind(vma);
 			if (ret)
 				return ret;
+
+			vma = NULL;
 		}
 	}
 
-	if (!i915_gem_obj_bound(obj, vm)) {
-		ret = i915_gem_object_bind_to_vm(obj, vm, alignment,
-						 map_and_fenceable,
-						 nonblocking);
-		if (ret)
-			return ret;
-
+	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
+		vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags);
+		if (IS_ERR(vma))
+			return PTR_ERR(vma);
 	}
 
-	vma = i915_gem_obj_to_vma(obj, vm);
-
-	vma->bind_vma(vma, obj->cache_level, flags);
+	if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping)
+		vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
 
-	i915_gem_obj_to_vma(obj, vm)->pin_count++;
-	obj->pin_mappable |= map_and_fenceable;
+	vma->pin_count++;
+	if (flags & PIN_MAPPABLE)
+		obj->pin_mappable |= true;
 
 	return 0;
 }
@@ -3987,7 +3958,7 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
 	}
 
 	if (obj->user_pin_count == 0) {
-		ret = i915_gem_obj_ggtt_pin(obj, args->alignment, true, false);
+		ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE);
 		if (ret)
 			goto out;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ebe0f67eac08..b428b101f9d0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -282,8 +282,7 @@ i915_gem_create_context(struct drm_device *dev,
 			 * context.
 			 */
 			ret = i915_gem_obj_ggtt_pin(ctx->obj,
-						    get_context_alignment(dev),
-						    false, false);
+						    get_context_alignment(dev), 0);
 			if (ret) {
 				DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
 				goto err_destroy;
@@ -336,8 +335,7 @@ void i915_gem_context_reset(struct drm_device *dev)
 
 		if (i == RCS) {
 			WARN_ON(i915_gem_obj_ggtt_pin(dctx->obj,
-						      get_context_alignment(dev),
-						      false, false));
+						      get_context_alignment(dev), 0));
 			/* Fake a finish/inactive */
 			dctx->obj->base.write_domain = 0;
 			dctx->obj->active = 0;
@@ -600,8 +598,7 @@ static int do_switch(struct intel_ring_buffer *ring,
 	/* Trying to pin first makes error handling easier. */
 	if (ring == &dev_priv->ring[RCS]) {
 		ret = i915_gem_obj_ggtt_pin(to->obj,
-					    get_context_alignment(ring->dev),
-					    false, false);
+					    get_context_alignment(ring->dev), 0);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 54413ed46a46..fed72a67afd2 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -48,14 +48,14 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
 int
 i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
 			 int min_size, unsigned alignment, unsigned cache_level,
-			 bool mappable, bool nonblocking)
+			 unsigned flags)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct list_head eviction_list, unwind_list;
 	struct i915_vma *vma;
 	int ret = 0;
 
-	trace_i915_gem_evict(dev, min_size, alignment, mappable);
+	trace_i915_gem_evict(dev, min_size, alignment, flags);
 
 	/*
 	 * The goal is to evict objects and amalgamate space in LRU order.
@@ -81,7 +81,7 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
 	 */
 
 	INIT_LIST_HEAD(&unwind_list);
-	if (mappable) {
+	if (flags & PIN_MAPPABLE) {
 		BUG_ON(!i915_is_ggtt(vm));
 		drm_mm_init_scan_with_range(&vm->mm, min_size,
 					    alignment, cache_level, 0,
@@ -96,7 +96,7 @@ search_again:
 			goto found;
 	}
 
-	if (nonblocking)
+	if (flags & PIN_NONBLOCK)
 		goto none;
 
 	/* Now merge in the soon-to-be-expired objects... */
@@ -120,13 +120,13 @@ none:
 	/* Can we unpin some objects such as idle hw contents,
 	 * or pending flips?
 	 */
-	ret = nonblocking ? -ENOSPC : i915_gpu_idle(dev);
+	ret = flags & PIN_NONBLOCK ? -ENOSPC : i915_gpu_idle(dev);
 	if (ret)
 		return ret;
 
 	/* Only idle the GPU and repeat the search once */
 	i915_gem_retire_requests(dev);
-	nonblocking = true;
+	flags |= PIN_NONBLOCK;
 	goto search_again;
 
 found:
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index bbff8f9b6549..cd09d42f507b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -544,19 +544,23 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
-	bool need_fence, need_mappable;
-	u32 flags = (entry->flags & EXEC_OBJECT_NEEDS_GTT) &&
-		!vma->obj->has_global_gtt_mapping ? GLOBAL_BIND : 0;
+	bool need_fence;
+	unsigned flags;
 	int ret;
 
+	flags = 0;
+
 	need_fence =
 		has_fenced_gpu_access &&
 		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 		obj->tiling_mode != I915_TILING_NONE;
-	need_mappable = need_fence || need_reloc_mappable(vma);
+	if (need_fence || need_reloc_mappable(vma))
+		flags |= PIN_MAPPABLE;
 
-	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, need_mappable,
-				  false);
+	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
+		flags |= PIN_GLOBAL;
+
+	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
 	if (ret)
 		return ret;
 
@@ -585,8 +589,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
 	}
 
-	vma->bind_vma(vma, obj->cache_level, flags);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 998f9a0b322a..d113eb5e2f5b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -865,7 +865,7 @@ alloc:
 	if (ret == -ENOSPC && !retried) {
 		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
 					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
-					       I915_CACHE_NONE, false, true);
+					       I915_CACHE_NONE, 0);
 		if (ret)
 			return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 6e580c98dede..b95a380958db 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -34,15 +34,15 @@ TRACE_EVENT(i915_gem_object_create,
 );
 
 TRACE_EVENT(i915_vma_bind,
-	    TP_PROTO(struct i915_vma *vma, bool mappable),
-	    TP_ARGS(vma, mappable),
+	    TP_PROTO(struct i915_vma *vma, unsigned flags),
+	    TP_ARGS(vma, flags),
 
 	    TP_STRUCT__entry(
 			     __field(struct drm_i915_gem_object *, obj)
 			     __field(struct i915_address_space *, vm)
 			     __field(u32, offset)
 			     __field(u32, size)
-			     __field(bool, mappable)
+			     __field(unsigned, flags)
 			     ),
 
 	    TP_fast_assign(
@@ -50,12 +50,12 @@ TRACE_EVENT(i915_vma_bind,
 			   __entry->vm = vma->vm;
 			   __entry->offset = vma->node.start;
 			   __entry->size = vma->node.size;
-			   __entry->mappable = mappable;
+			   __entry->flags = flags;
 			   ),
 
 	    TP_printk("obj=%p, offset=%08x size=%x%s vm=%p",
 		      __entry->obj, __entry->offset, __entry->size,
-		      __entry->mappable ? ", mappable" : "",
+		      __entry->flags & PIN_MAPPABLE ? ", mappable" : "",
 		      __entry->vm)
 );
 
@@ -196,26 +196,26 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy,
 );
 
 TRACE_EVENT(i915_gem_evict,
-	    TP_PROTO(struct drm_device *dev, u32 size, u32 align, bool mappable),
-	    TP_ARGS(dev, size, align, mappable),
+	    TP_PROTO(struct drm_device *dev, u32 size, u32 align, unsigned flags),
+	    TP_ARGS(dev, size, align, flags),
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
 			     __field(u32, size)
 			     __field(u32, align)
-			     __field(bool, mappable)
+			     __field(unsigned, flags)
 			    ),
 
 	    TP_fast_assign(
 			   __entry->dev = dev->primary->index;
 			   __entry->size = size;
 			   __entry->align = align;
-			   __entry->mappable = mappable;
+			   __entry->flags = flags;
 			  ),
 
 	    TP_printk("dev=%d, size=%d, align=%d %s",
 		      __entry->dev, __entry->size, __entry->align,
-		      __entry->mappable ? ", mappable" : "")
+		      __entry->flags & PIN_MAPPABLE ? ", mappable" : "")
 );
 
 TRACE_EVENT(i915_gem_evict_everything,
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index a1397b1646af..1d46608bbd8b 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -1349,7 +1349,7 @@ void intel_setup_overlay(struct drm_device *dev)
 		}
 		overlay->flip_addr = reg_bo->phys_obj->handle->busaddr;
 	} else {
-		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, true, false);
+		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE);
 		if (ret) {
 			DRM_ERROR("failed to pin overlay register bo\n");
 			goto out_free_bo;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 469170c11bb4..e29804eda113 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2753,7 +2753,7 @@ intel_alloc_context_page(struct drm_device *dev)
 		return NULL;
 	}
 
-	ret = i915_gem_obj_ggtt_pin(ctx, 4096, true, false);
+	ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0);
 	if (ret) {
 		DRM_ERROR("failed to pin power context: %d\n", ret);
 		goto err_unref;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 442c9a66ebab..5744841669e4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -531,9 +531,11 @@ init_pipe_control(struct intel_ring_buffer *ring)
 		goto err;
 	}
 
-	i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
+	ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
+	if (ret)
+		goto err_unref;
 
-	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, true, false);
+	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
 	if (ret)
 		goto err_unref;
 
@@ -1269,12 +1271,13 @@ static int init_status_page(struct intel_ring_buffer *ring)
 		goto err;
 	}
 
-	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+	if (ret)
+		goto err_unref;
 
-	ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false);
-	if (ret != 0) {
+	ret = i915_gem_obj_ggtt_pin(obj, 4096, 0);
+	if (ret)
 		goto err_unref;
-	}
 
 	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
 	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
@@ -1354,7 +1357,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 
 	ring->obj = obj;
 
-	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
+	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
 	if (ret)
 		goto err_unref;
 
@@ -1927,7 +1930,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 			return -ENOMEM;
 		}
 
-		ret = i915_gem_obj_ggtt_pin(obj, 0, true, false);
+		ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
 		if (ret != 0) {
 			drm_gem_object_unreference(&obj->base);
 			DRM_ERROR("Failed to ping batch bo\n");
-- 
1.8.5.2

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt
  2014-01-01 14:00 [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer Chris Wilson
@ 2014-01-01 14:00 ` Chris Wilson
  2014-01-01 14:00 ` [PATCH 3/3] drm/i915: Remove redundant list_empty(eb->vmas) tests in execbuffer Chris Wilson
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2014-01-01 14:00 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky

With ppgtt, it is no longer correct to mark an object as
map_and_fenceable if we simply unbind it from the global gtt. This has
consequences during execbuffer where we simply use
obj->map_and_fenceable in use_cpu_reloc() to decide which access method
to use for writing into the object. Now for a ppgtt object,
map_and_fenceable will be true when it is not bound into the ggtt but
only in a ppgtt, leading to an invalid access on a non-llc architecture.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c            |  5 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 92 ++++++++++++++----------------
 drivers/gpu/drm/i915/i915_gem_tiling.c     |  7 +--
 3 files changed, 47 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d9acfa78d1a5..344bb47adaeb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2791,9 +2791,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 	i915_gem_gtt_finish_object(obj);
 
 	list_del(&vma->mm_list);
-	/* Avoid an unnecessary call to unbind on rebind. */
 	if (i915_is_ggtt(vma->vm))
-		obj->map_and_fenceable = true;
+		obj->map_and_fenceable = false;
 
 	drm_mm_remove_node(&vma->node);
 	i915_gem_vma_destroy(vma);
@@ -4114,8 +4113,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	obj->fence_reg = I915_FENCE_REG_NONE;
 	obj->madv = I915_MADV_WILLNEED;
-	/* Avoid an unnecessary call to unbind on the first bind. */
-	obj->map_and_fenceable = true;
 
 	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index cd09d42f507b..26b57f1135c9 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 
 struct eb_vmas {
 	struct list_head vmas;
@@ -529,14 +530,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
 }
 
 static int
-need_reloc_mappable(struct i915_vma *vma)
-{
-	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
-		i915_is_ggtt(vma->vm);
-}
-
-static int
 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 				struct intel_ring_buffer *ring,
 				bool *need_reloc)
@@ -544,19 +537,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
-	bool need_fence;
 	unsigned flags;
 	int ret;
 
 	flags = 0;
-
-	need_fence =
-		has_fenced_gpu_access &&
-		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-		obj->tiling_mode != I915_TILING_NONE;
-	if (need_fence || need_reloc_mappable(vma))
+	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
 		flags |= PIN_MAPPABLE;
-
 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
 		flags |= PIN_GLOBAL;
 
@@ -592,6 +578,27 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 	return 0;
 }
 
+static bool
+need_reloc_mappable(struct i915_vma *vma)
+{
+	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+
+	if (entry->relocation_count == 0)
+		return false;
+
+	if (!i915_is_ggtt(vma->vm))
+		return false;
+
+	/* See also use_cpu_reloc() */
+	if (HAS_LLC(vma->obj->base.dev))
+		return false;
+
+	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+		return false;
+
+	return true;
+}
+
 static int
 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    struct list_head *vmas,
@@ -624,9 +631,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			obj->tiling_mode != I915_TILING_NONE;
 		need_mappable = need_fence || need_reloc_mappable(vma);
 
-		if (need_mappable)
+		if (need_mappable) {
+			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
 			list_move(&vma->exec_list, &ordered_vmas);
-		else
+		} else
 			list_move_tail(&vma->exec_list, &ordered_vmas);
 
 		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
@@ -654,25 +662,14 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 		/* Unbind any ill-fitting objects or pin. */
 		list_for_each_entry(vma, vmas, exec_list) {
 			struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-			bool need_fence, need_mappable;
 
 			obj = vma->obj;
 
 			if (!drm_mm_node_allocated(&vma->node))
 				continue;
 
-			need_fence =
-				has_fenced_gpu_access &&
-				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-				obj->tiling_mode != I915_TILING_NONE;
-			need_mappable = need_fence || need_reloc_mappable(vma);
-
-			WARN_ON((need_mappable || need_fence) &&
-			       !i915_is_ggtt(vma->vm));
-
-			if ((entry->alignment &&
-			     vma->node.start & (entry->alignment - 1)) ||
-			    (need_mappable && !obj->map_and_fenceable))
+			if ((entry->alignment && vma->node.start & (entry->alignment - 1)) ||
+			    (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable))
 				ret = i915_vma_unbind(vma);
 			else
 				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -1185,33 +1182,28 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but bdw mucks it up again. */
-	if (flags & I915_DISPATCH_SECURE &&
-	    !batch_obj->has_global_gtt_mapping) {
-		/* When we have multiple VMs, we'll need to make sure that we
-		 * allocate space first */
-		struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
-		BUG_ON(!vma);
-		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
-	}
+	if (flags & I915_DISPATCH_SECURE) {
+		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+		if (ret)
+			goto err;
 
-	if (flags & I915_DISPATCH_SECURE)
 		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
-	else
+	} else
 		exec_start += i915_gem_obj_offset(batch_obj, vm);
 
 	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
 	if (ret)
-		goto err;
+		goto err_batch;
 
 	ret = i915_switch_context(ring, file, ctx);
 	if (ret)
-		goto err;
+		goto err_batch;
 
 	if (ring == &dev_priv->ring[RCS] &&
 	    mode != dev_priv->relative_constants_mode) {
 		ret = intel_ring_begin(ring, 4);
 		if (ret)
-				goto err;
+			goto err_batch;
 
 		intel_ring_emit(ring, MI_NOOP);
 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
@@ -1225,30 +1217,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
 		ret = i915_reset_gen7_sol_offsets(dev, ring);
 		if (ret)
-			goto err;
+			goto err_batch;
 	}
 
-
 	exec_len = args->batch_len;
 	if (cliprects) {
 		for (i = 0; i < args->num_cliprects; i++) {
 			ret = i915_emit_box(dev, &cliprects[i],
 					    args->DR1, args->DR4);
 			if (ret)
-				goto err;
+				goto err_batch;
 
 			ret = ring->dispatch_execbuffer(ring,
 							exec_start, exec_len,
 							flags);
 			if (ret)
-				goto err;
+				goto err_batch;
 		}
 	} else {
 		ret = ring->dispatch_execbuffer(ring,
 						exec_start, exec_len,
 						flags);
 		if (ret)
-			goto err;
+			goto err_batch;
 	}
 
 	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
@@ -1256,6 +1247,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
 	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
 
+err_batch:
+	if (flags & I915_DISPATCH_SECURE)
+		i915_gem_object_ggtt_unpin(batch_obj);
 err:
 	/* the request owns the ref now */
 	i915_gem_context_unreference(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index eb993584aa6b..e49c662ca9c6 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -359,10 +359,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 		 */
 
 		obj->map_and_fenceable =
-			!i915_gem_obj_ggtt_bound(obj) ||
-			(i915_gem_obj_ggtt_offset(obj) +
-			 obj->base.size <= dev_priv->gtt.mappable_end &&
-			 i915_gem_object_fence_ok(obj, args->tiling_mode));
+			i915_gem_obj_ggtt_bound(obj) &&
+			i915_gem_obj_ggtt_offset(obj) + obj->base.size <= dev_priv->gtt.mappable_end &&
+			i915_gem_object_fence_ok(obj, args->tiling_mode);
 
 		/* Rebind if we need a change of alignment */
 		if (!obj->map_and_fenceable) {
-- 
1.8.5.2

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/3] drm/i915: Remove redundant list_empty(eb->vmas) tests in execbuffer
  2014-01-01 14:00 [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer Chris Wilson
  2014-01-01 14:00 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
@ 2014-01-01 14:00 ` Chris Wilson
  2014-01-01 21:51 ` [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer Ben Widawsky
  2014-01-07  7:43 ` Daniel Vetter
  3 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2014-01-01 14:00 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky

Part of the pre-validation for an execbuffer call is that there is at
least one object in the execlist. As we bail if we fail to lookup any
object, we can be sure that after the eb_lookup_vma() there is at least
one object in the vma list and so we do not need to assert.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 26b57f1135c9..0396f943d7a1 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -611,9 +611,6 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	int retry;
 
-	if (list_empty(vmas))
-		return 0;
-
 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
 
 	INIT_LIST_HEAD(&ordered_vmas);
@@ -717,9 +714,6 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 	int i, total, ret;
 	unsigned count = args->buffer_count;
 
-	if (WARN_ON(list_empty(&eb->vmas)))
-		return 0;
-
 	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
 
 	/* We may process another execbuffer during the unlock... */
-- 
1.8.5.2

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer
  2014-01-01 14:00 [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer Chris Wilson
  2014-01-01 14:00 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
  2014-01-01 14:00 ` [PATCH 3/3] drm/i915: Remove redundant list_empty(eb->vmas) tests in execbuffer Chris Wilson
@ 2014-01-01 21:51 ` Ben Widawsky
  2014-01-02 10:37   ` Chris Wilson
  2014-01-07  7:43 ` Daniel Vetter
  3 siblings, 1 reply; 8+ messages in thread
From: Ben Widawsky @ 2014-01-01 21:51 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx

On Wed, Jan 01, 2014 at 02:00:54PM +0000, Chris Wilson wrote:
> One side-effect of the introduction of ppgtt was that we needed to
> rebind the object into the appropriate vm (and global gtt in some
> peculiar cases). For simplicity this was done twice for every object on
> every call to execbuffer. However, that adds a tremendous amount of CPU
> overhead (rewriting all the PTE for all objects into WC memory) per
> draw. The fix is to push all the decision about which vm to bind into
> and when down into the low-level bind routines through hints rather than
> inside the execbuffer routine.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72906
> Tested-by: jianx.zhou@intel.com
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>

My original plan with full PPGTT was to avoid all this deferred bind,
and other complications in the bind logic. My intention had been, if a
VMA exists, it is bound (with global GTT as the special case). It
would have therefore been relatively easy to determine when to bind an
object into a VM. One thing which I didn't plan for was having to change
the caching type. To a large extent, I feel this patch heads towards
that goal, except uses drm_mm_node_allocated where I had expected to
simply check if a VMA exists.

Anyway, no point to this really other than to explain why things are
somewhat messy.

> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  25 +++---
>  drivers/gpu/drm/i915/i915_gem.c            | 119 +++++++++++------------------
>  drivers/gpu/drm/i915/i915_gem_context.c    |   9 +--
>  drivers/gpu/drm/i915/i915_gem_evict.c      |  12 +--
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  18 +++--
>  drivers/gpu/drm/i915/i915_gem_gtt.c        |   2 +-
>  drivers/gpu/drm/i915/i915_trace.h          |  20 ++---
>  drivers/gpu/drm/i915/intel_overlay.c       |   2 +-
>  drivers/gpu/drm/i915/intel_pm.c            |   2 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.c    |  19 +++--
>  10 files changed, 103 insertions(+), 125 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index cc8afff878b1..65379474f312 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2033,11 +2033,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  				     struct i915_address_space *vm,
>  				     uint32_t alignment,
> -				     bool map_and_fenceable,
> -				     bool nonblocking);
> -void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
> +				     unsigned flags);
> +#define PIN_MAPPABLE 0x1
> +#define PIN_GLOBAL 0x2
> +#define PIN_NONBLOCK 0x4
>  int __must_check i915_vma_unbind(struct i915_vma *vma);
> -int __must_check i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj);
>  int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
>  void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
>  void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
> @@ -2237,13 +2237,19 @@ i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj)
>  static inline int __must_check
>  i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
>  		      uint32_t alignment,
> -		      bool map_and_fenceable,
> -		      bool nonblocking)
> +		      unsigned flags)
>  {
> -	return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment,
> -				   map_and_fenceable, nonblocking);
> +	return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment, flags | PIN_GLOBAL);
>  }

I'm torn as to whether or not it's okay to not pass PIN_GLOBAL in flags.

>  
> +static inline int
> +i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
> +{
> +	return i915_vma_unbind(i915_gem_obj_to_ggtt(obj));
> +}
> +
> +void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
> +
>  /* i915_gem_context.c */
>  #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base)
>  int __must_check i915_gem_context_init(struct drm_device *dev);
> @@ -2280,8 +2286,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
>  					  int min_size,
>  					  unsigned alignment,
>  					  unsigned cache_level,
> -					  bool mappable,
> -					  bool nonblock);
> +					  unsigned flags);
>  int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>  int i915_gem_evict_everything(struct drm_device *dev);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 656406deada0..d9acfa78d1a5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -43,12 +43,6 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
>  static __must_check int
>  i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  			       bool readonly);
> -static __must_check int
> -i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> -			   struct i915_address_space *vm,
> -			   unsigned alignment,
> -			   bool map_and_fenceable,
> -			   bool nonblocking);
>  static int i915_gem_phys_pwrite(struct drm_device *dev,
>  				struct drm_i915_gem_object *obj,
>  				struct drm_i915_gem_pwrite *args,
> @@ -605,7 +599,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
>  	char __user *user_data;
>  	int page_offset, page_length, ret;
>  
> -	ret = i915_gem_obj_ggtt_pin(obj, 0, true, true);
> +	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
>  	if (ret)
>  		goto out;
>  
> @@ -1399,7 +1393,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	}
>  
>  	/* Now bind it into the GTT if needed */
> -	ret = i915_gem_obj_ggtt_pin(obj,  0, true, false);
> +	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
>  	if (ret)
>  		goto unlock;
>  
> @@ -2767,7 +2761,6 @@ int i915_vma_unbind(struct i915_vma *vma)
>  
>  	if (!drm_mm_node_allocated(&vma->node)) {
>  		i915_gem_vma_destroy(vma);
> -
>  		return 0;
>  	}
>  
> @@ -2819,26 +2812,6 @@ int i915_vma_unbind(struct i915_vma *vma)
>  	return 0;
>  }
>  
> -/**
> - * Unbinds an object from the global GTT aperture.
> - */
> -int
> -i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
> -{
> -	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> -	struct i915_address_space *ggtt = &dev_priv->gtt.base;
> -
> -	if (!i915_gem_obj_ggtt_bound(obj))
> -		return 0;
> -
> -	if (i915_gem_obj_to_ggtt(obj)->pin_count)
> -		return -EBUSY;
> -
> -	BUG_ON(obj->pages == NULL);
> -
> -	return i915_vma_unbind(i915_gem_obj_to_vma(obj, ggtt));
> -}
> -

My gut tells me this hunk should be in a different patch.

>  int i915_gpu_idle(struct drm_device *dev)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> @@ -3256,18 +3229,17 @@ static void i915_gem_verify_gtt(struct drm_device *dev)
>  /**
>   * Finds free space in the GTT aperture and binds the object there.
>   */
> -static int
> +static struct i915_vma *
>  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  			   struct i915_address_space *vm,
>  			   unsigned alignment,
> -			   bool map_and_fenceable,
> -			   bool nonblocking)
> +			   unsigned flags)
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  	u32 size, fence_size, fence_alignment, unfenced_alignment;
>  	size_t gtt_max =
> -		map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total;
> +		flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
>  	struct i915_vma *vma;
>  	int ret;
>  
> @@ -3279,18 +3251,18 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  						     obj->tiling_mode, true);
>  	unfenced_alignment =
>  		i915_gem_get_gtt_alignment(dev,
> -						    obj->base.size,
> -						    obj->tiling_mode, false);
> +					   obj->base.size,
> +					   obj->tiling_mode, false);
>  
>  	if (alignment == 0)
> -		alignment = map_and_fenceable ? fence_alignment :
> +		alignment = flags & PIN_MAPPABLE ? fence_alignment :
>  						unfenced_alignment;
> -	if (map_and_fenceable && alignment & (fence_alignment - 1)) {
> +	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
>  		DRM_ERROR("Invalid object alignment requested %u\n", alignment);
> -		return -EINVAL;
> +		return ERR_PTR(-EINVAL);
>  	}
>  
> -	size = map_and_fenceable ? fence_size : obj->base.size;
> +	size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
>  
>  	/* If the object is bigger than the entire aperture, reject it early
>  	 * before evicting everything in a vain attempt to find space.
> @@ -3298,22 +3270,20 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  	if (obj->base.size > gtt_max) {
>  		DRM_ERROR("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n",
>  			  obj->base.size,
> -			  map_and_fenceable ? "mappable" : "total",
> +			  flags & PIN_MAPPABLE ? "mappable" : "total",
>  			  gtt_max);
> -		return -E2BIG;
> +		return ERR_PTR(-E2BIG);
>  	}
>  
>  	ret = i915_gem_object_get_pages(obj);
>  	if (ret)
> -		return ret;
> +		return ERR_PTR(ret);
>  
>  	i915_gem_object_pin_pages(obj);
>  
>  	vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
> -	if (IS_ERR(vma)) {
> -		ret = PTR_ERR(vma);
> +	if (IS_ERR(vma))
>  		goto err_unpin;
> -	}
>  
>  search_free:
>  	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> @@ -3322,9 +3292,7 @@ search_free:
>  						  DRM_MM_SEARCH_DEFAULT);
>  	if (ret) {
>  		ret = i915_gem_evict_something(dev, vm, size, alignment,
> -					       obj->cache_level,
> -					       map_and_fenceable,
> -					       nonblocking);
> +					       obj->cache_level, flags);
>  		if (ret == 0)
>  			goto search_free;
>  
> @@ -3355,19 +3323,23 @@ search_free:
>  		obj->map_and_fenceable = mappable && fenceable;
>  	}
>  
> -	WARN_ON(map_and_fenceable && !obj->map_and_fenceable);
> +	WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
> +
> +	trace_i915_vma_bind(vma, flags);
> +	vma->bind_vma(vma, obj->cache_level,
> +		      flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0);
>  
> -	trace_i915_vma_bind(vma, map_and_fenceable);
>  	i915_gem_verify_gtt(dev);
> -	return 0;
> +	return vma;
>  
>  err_remove_node:
>  	drm_mm_remove_node(&vma->node);
>  err_free_vma:
>  	i915_gem_vma_destroy(vma);
> +	vma = ERR_PTR(ret);
>  err_unpin:
>  	i915_gem_object_unpin_pages(obj);
> -	return ret;
> +	return vma;
>  }
>  
>  bool
> @@ -3559,7 +3531,9 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  		}
>  
>  		list_for_each_entry(vma, &obj->vma_list, vma_link)
> -			vma->bind_vma(vma, cache_level, 0);
> +			if (drm_mm_node_allocated(&vma->node))
> +				vma->bind_vma(vma, cache_level,
> +					      obj->has_global_gtt_mapping ? GLOBAL_BIND : 0);

will if (vma->node.color != cache_level) work here?

>  	}
>  
>  	list_for_each_entry(vma, &obj->vma_list, vma_link)
> @@ -3728,7 +3702,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	 * (e.g. libkms for the bootup splash), we have to ensure that we
>  	 * always use map_and_fenceable for all scanout buffers.
>  	 */
> -	ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false);
> +	ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE);
>  	if (ret)
>  		goto err_unpin_display;
>  
> @@ -3884,52 +3858,49 @@ int
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  		    struct i915_address_space *vm,
>  		    uint32_t alignment,
> -		    bool map_and_fenceable,
> -		    bool nonblocking)
> +		    unsigned flags)
>  {
> -	const u32 flags = map_and_fenceable ? GLOBAL_BIND : 0;
>  	struct i915_vma *vma;
>  	int ret;
>  
> -	WARN_ON(map_and_fenceable && !i915_is_ggtt(vm));
> +	if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
> +		return -EINVAL;
>  
>  	vma = i915_gem_obj_to_vma(obj, vm);
> -
>  	if (vma) {
>  		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
>  			return -EBUSY;
>  
>  		if ((alignment &&
>  		     vma->node.start & (alignment - 1)) ||
> -		    (map_and_fenceable && !obj->map_and_fenceable)) {
> +		    (flags & PIN_MAPPABLE && !obj->map_and_fenceable)) {
>  			WARN(vma->pin_count,
>  			     "bo is already pinned with incorrect alignment:"
>  			     " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
>  			     " obj->map_and_fenceable=%d\n",
>  			     i915_gem_obj_offset(obj, vm), alignment,
> -			     map_and_fenceable,
> +			     flags & PIN_MAPPABLE,
>  			     obj->map_and_fenceable);
>  			ret = i915_vma_unbind(vma);
>  			if (ret)
>  				return ret;
> +
> +			vma = NULL;
>  		}
>  	}
>  
> -	if (!i915_gem_obj_bound(obj, vm)) {
> -		ret = i915_gem_object_bind_to_vm(obj, vm, alignment,
> -						 map_and_fenceable,
> -						 nonblocking);
> -		if (ret)
> -			return ret;
> -
> +	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
> +		vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags);
> +		if (IS_ERR(vma))
> +			return PTR_ERR(vma);
>  	}
>  
> -	vma = i915_gem_obj_to_vma(obj, vm);
> -
> -	vma->bind_vma(vma, obj->cache_level, flags);
> +	if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping)
> +		vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);

For some reason I remember seeing a regression when I tried this. It
should be safe to do this without most of the other changes - now I
wonder what I had seen.

>  
> -	i915_gem_obj_to_vma(obj, vm)->pin_count++;
> -	obj->pin_mappable |= map_and_fenceable;
> +	vma->pin_count++;
> +	if (flags & PIN_MAPPABLE)
> +		obj->pin_mappable |= true;

just '= true'

>  
>  	return 0;
>  }
> @@ -3987,7 +3958,7 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
>  	}
>  
>  	if (obj->user_pin_count == 0) {
> -		ret = i915_gem_obj_ggtt_pin(obj, args->alignment, true, false);
> +		ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE);
>  		if (ret)
>  			goto out;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index ebe0f67eac08..b428b101f9d0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -282,8 +282,7 @@ i915_gem_create_context(struct drm_device *dev,
>  			 * context.
>  			 */
>  			ret = i915_gem_obj_ggtt_pin(ctx->obj,
> -						    get_context_alignment(dev),
> -						    false, false);
> +						    get_context_alignment(dev), 0);
>  			if (ret) {
>  				DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
>  				goto err_destroy;
> @@ -336,8 +335,7 @@ void i915_gem_context_reset(struct drm_device *dev)
>  
>  		if (i == RCS) {
>  			WARN_ON(i915_gem_obj_ggtt_pin(dctx->obj,
> -						      get_context_alignment(dev),
> -						      false, false));
> +						      get_context_alignment(dev), 0));
>  			/* Fake a finish/inactive */
>  			dctx->obj->base.write_domain = 0;
>  			dctx->obj->active = 0;
> @@ -600,8 +598,7 @@ static int do_switch(struct intel_ring_buffer *ring,
>  	/* Trying to pin first makes error handling easier. */
>  	if (ring == &dev_priv->ring[RCS]) {
>  		ret = i915_gem_obj_ggtt_pin(to->obj,
> -					    get_context_alignment(ring->dev),
> -					    false, false);
> +					    get_context_alignment(ring->dev), 0);
>  		if (ret)
>  			return ret;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 54413ed46a46..fed72a67afd2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -48,14 +48,14 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
>  int
>  i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
>  			 int min_size, unsigned alignment, unsigned cache_level,
> -			 bool mappable, bool nonblocking)
> +			 unsigned flags)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  	struct list_head eviction_list, unwind_list;
>  	struct i915_vma *vma;
>  	int ret = 0;
>  
> -	trace_i915_gem_evict(dev, min_size, alignment, mappable);
> +	trace_i915_gem_evict(dev, min_size, alignment, flags);
>  
>  	/*
>  	 * The goal is to evict objects and amalgamate space in LRU order.
> @@ -81,7 +81,7 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
>  	 */
>  
>  	INIT_LIST_HEAD(&unwind_list);
> -	if (mappable) {
> +	if (flags & PIN_MAPPABLE) {
>  		BUG_ON(!i915_is_ggtt(vm));
>  		drm_mm_init_scan_with_range(&vm->mm, min_size,
>  					    alignment, cache_level, 0,
> @@ -96,7 +96,7 @@ search_again:
>  			goto found;
>  	}
>  
> -	if (nonblocking)
> +	if (flags & PIN_NONBLOCK)
>  		goto none;
>  
>  	/* Now merge in the soon-to-be-expired objects... */
> @@ -120,13 +120,13 @@ none:
>  	/* Can we unpin some objects such as idle hw contents,
>  	 * or pending flips?
>  	 */
> -	ret = nonblocking ? -ENOSPC : i915_gpu_idle(dev);
> +	ret = flags & PIN_NONBLOCK ? -ENOSPC : i915_gpu_idle(dev);
>  	if (ret)
>  		return ret;
>  
>  	/* Only idle the GPU and repeat the search once */
>  	i915_gem_retire_requests(dev);
> -	nonblocking = true;
> +	flags |= PIN_NONBLOCK;
>  	goto search_again;
>  
>  found:
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index bbff8f9b6549..cd09d42f507b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -544,19 +544,23 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  	struct drm_i915_gem_object *obj = vma->obj;
>  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
>  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> -	bool need_fence, need_mappable;
> -	u32 flags = (entry->flags & EXEC_OBJECT_NEEDS_GTT) &&
> -		!vma->obj->has_global_gtt_mapping ? GLOBAL_BIND : 0;
> +	bool need_fence;
> +	unsigned flags;
>  	int ret;
>  
> +	flags = 0;
> +
>  	need_fence =
>  		has_fenced_gpu_access &&
>  		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
>  		obj->tiling_mode != I915_TILING_NONE;
> -	need_mappable = need_fence || need_reloc_mappable(vma);
> +	if (need_fence || need_reloc_mappable(vma))
> +		flags |= PIN_MAPPABLE;
>  
> -	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, need_mappable,
> -				  false);
> +	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
> +		flags |= PIN_GLOBAL;
> +
> +	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
>  	if (ret)
>  		return ret;
>  
> @@ -585,8 +589,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
>  	}
>  
> -	vma->bind_vma(vma, obj->cache_level, flags);
> -
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 998f9a0b322a..d113eb5e2f5b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -865,7 +865,7 @@ alloc:
>  	if (ret == -ENOSPC && !retried) {
>  		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
>  					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
> -					       I915_CACHE_NONE, false, true);
> +					       I915_CACHE_NONE, 0);

You sure you want to stick this behavioral change in here?

>  		if (ret)
>  			return ret;
>  
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 6e580c98dede..b95a380958db 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -34,15 +34,15 @@ TRACE_EVENT(i915_gem_object_create,
>  );
>  
>  TRACE_EVENT(i915_vma_bind,
> -	    TP_PROTO(struct i915_vma *vma, bool mappable),
> -	    TP_ARGS(vma, mappable),
> +	    TP_PROTO(struct i915_vma *vma, unsigned flags),
> +	    TP_ARGS(vma, flags),
>  
>  	    TP_STRUCT__entry(
>  			     __field(struct drm_i915_gem_object *, obj)
>  			     __field(struct i915_address_space *, vm)
>  			     __field(u32, offset)
>  			     __field(u32, size)
> -			     __field(bool, mappable)
> +			     __field(unsigned, flags)
>  			     ),
>  
>  	    TP_fast_assign(
> @@ -50,12 +50,12 @@ TRACE_EVENT(i915_vma_bind,
>  			   __entry->vm = vma->vm;
>  			   __entry->offset = vma->node.start;
>  			   __entry->size = vma->node.size;
> -			   __entry->mappable = mappable;
> +			   __entry->flags = flags;
>  			   ),
>  
>  	    TP_printk("obj=%p, offset=%08x size=%x%s vm=%p",
>  		      __entry->obj, __entry->offset, __entry->size,
> -		      __entry->mappable ? ", mappable" : "",
> +		      __entry->flags & PIN_MAPPABLE ? ", mappable" : "",
>  		      __entry->vm)
>  );
>  
> @@ -196,26 +196,26 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy,
>  );
>  
>  TRACE_EVENT(i915_gem_evict,
> -	    TP_PROTO(struct drm_device *dev, u32 size, u32 align, bool mappable),
> -	    TP_ARGS(dev, size, align, mappable),
> +	    TP_PROTO(struct drm_device *dev, u32 size, u32 align, unsigned flags),
> +	    TP_ARGS(dev, size, align, flags),
>  
>  	    TP_STRUCT__entry(
>  			     __field(u32, dev)
>  			     __field(u32, size)
>  			     __field(u32, align)
> -			     __field(bool, mappable)
> +			     __field(unsigned, flags)
>  			    ),
>  
>  	    TP_fast_assign(
>  			   __entry->dev = dev->primary->index;
>  			   __entry->size = size;
>  			   __entry->align = align;
> -			   __entry->mappable = mappable;
> +			   __entry->flags = flags;
>  			  ),
>  
>  	    TP_printk("dev=%d, size=%d, align=%d %s",
>  		      __entry->dev, __entry->size, __entry->align,
> -		      __entry->mappable ? ", mappable" : "")
> +		      __entry->flags & PIN_MAPPABLE ? ", mappable" : "")
>  );
>  
>  TRACE_EVENT(i915_gem_evict_everything,
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index a1397b1646af..1d46608bbd8b 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -1349,7 +1349,7 @@ void intel_setup_overlay(struct drm_device *dev)
>  		}
>  		overlay->flip_addr = reg_bo->phys_obj->handle->busaddr;
>  	} else {
> -		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, true, false);
> +		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE);
>  		if (ret) {
>  			DRM_ERROR("failed to pin overlay register bo\n");
>  			goto out_free_bo;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 469170c11bb4..e29804eda113 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2753,7 +2753,7 @@ intel_alloc_context_page(struct drm_device *dev)
>  		return NULL;
>  	}
>  
> -	ret = i915_gem_obj_ggtt_pin(ctx, 4096, true, false);
> +	ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0);

and here?

>  	if (ret) {
>  		DRM_ERROR("failed to pin power context: %d\n", ret);
>  		goto err_unref;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 442c9a66ebab..5744841669e4 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -531,9 +531,11 @@ init_pipe_control(struct intel_ring_buffer *ring)
>  		goto err;
>  	}
>  
> -	i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
> +	ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
> +	if (ret)
> +		goto err_unref;
>  
> -	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, true, false);
> +	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
>  	if (ret)
>  		goto err_unref;

and here?

>  
> @@ -1269,12 +1271,13 @@ static int init_status_page(struct intel_ring_buffer *ring)
>  		goto err;
>  	}
>  
> -	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> +	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> +	if (ret)
> +		goto err_unref;
>  
> -	ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false);
> -	if (ret != 0) {
> +	ret = i915_gem_obj_ggtt_pin(obj, 4096, 0);
> +	if (ret)
>  		goto err_unref;
> -	}

and here?

>  
>  	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
>  	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
> @@ -1354,7 +1357,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>  
>  	ring->obj = obj;
>  
> -	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
> +	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
>  	if (ret)
>  		goto err_unref;
>  
> @@ -1927,7 +1930,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  			return -ENOMEM;
>  		}
>  
> -		ret = i915_gem_obj_ggtt_pin(obj, 0, true, false);
> +		ret = i915_gem_obj_ggtt_pin(obj, 0, 0);

and here?

>  		if (ret != 0) {
>  			drm_gem_object_unreference(&obj->base);
>  			DRM_ERROR("Failed to ping batch bo\n");
> -- 
> 1.8.5.2
> 

I think it would have been better to do a prep patch with all m&f ->
flags changes - and then the actual fixes. And all the changes for
the mappable/fenceable attributes in a separate patch as well.

Past those relatively minor suggestions, lgtm. I'd like to see the patch
merged and run through as much QA/use as possible.

In its current state:
Acked-by: Ben Widawsky <ben@bwidawsk.net>

If you want to break it up a bit, I'll try to turn it into an r-b


-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer
  2014-01-01 21:51 ` [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer Ben Widawsky
@ 2014-01-02 10:37   ` Chris Wilson
  0 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2014-01-02 10:37 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Daniel Vetter, intel-gfx

On Wed, Jan 01, 2014 at 01:51:57PM -0800, Ben Widawsky wrote:
> On Wed, Jan 01, 2014 at 02:00:54PM +0000, Chris Wilson wrote:
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 998f9a0b322a..d113eb5e2f5b 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -865,7 +865,7 @@ alloc:
> >  	if (ret == -ENOSPC && !retried) {
> >  		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
> >  					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
> > -					       I915_CACHE_NONE, false, true);
> > +					       I915_CACHE_NONE, 0);
> 
> You sure you want to stick this behavioral change in here?

Yes. Stupid bug that fortunately could not be triggered.

> > -	ret = i915_gem_obj_ggtt_pin(ctx, 4096, true, false);
> > +	ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0);
> 
> and here?
> 
> >  	if (ret) {
> >  		DRM_ERROR("failed to pin power context: %d\n", ret);
> >  		goto err_unref;
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index 442c9a66ebab..5744841669e4 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -531,9 +531,11 @@ init_pipe_control(struct intel_ring_buffer *ring)
> >  		goto err;
> >  	}
> >  
> > -	i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
> > +	ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
> > +	if (ret)
> > +		goto err_unref;
> >  
> > -	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, true, false);
> > +	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
> >  	if (ret)
> >  		goto err_unref;
> 
> and here?
> 
> >  
> > @@ -1269,12 +1271,13 @@ static int init_status_page(struct intel_ring_buffer *ring)
> >  		goto err;
> >  	}
> >  
> > -	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> > +	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> > +	if (ret)
> > +		goto err_unref;
> >  
> > -	ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false);
> > -	if (ret != 0) {
> > +	ret = i915_gem_obj_ggtt_pin(obj, 4096, 0);
> > +	if (ret)
> >  		goto err_unref;
> > -	}
> 
> and here?
> 
> >  
> >  	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
> >  	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
> > @@ -1354,7 +1357,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
> >  
> >  	ring->obj = obj;
> >  
> > -	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
> > +	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
> >  	if (ret)
> >  		goto err_unref;
> >  
> > @@ -1927,7 +1930,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
> >  			return -ENOMEM;
> >  		}
> >  
> > -		ret = i915_gem_obj_ggtt_pin(obj, 0, true, false);
> > +		ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
> 
> and here?

YES!
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer
  2014-01-01 14:00 [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer Chris Wilson
                   ` (2 preceding siblings ...)
  2014-01-01 21:51 ` [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer Ben Widawsky
@ 2014-01-07  7:43 ` Daniel Vetter
  2014-01-08 10:30   ` Chris Wilson
  3 siblings, 1 reply; 8+ messages in thread
From: Daniel Vetter @ 2014-01-07  7:43 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx, Ben Widawsky

On Wed, Jan 01, 2014 at 02:00:54PM +0000, Chris Wilson wrote:
> One side-effect of the introduction of ppgtt was that we needed to
> rebind the object into the appropriate vm (and global gtt in some
> peculiar cases). For simplicity this was done twice for every object on
> every call to execbuffer. However, that adds a tremendous amount of CPU
> overhead (rewriting all the PTE for all objects into WC memory) per
> draw. The fix is to push all the decision about which vm to bind into
> and when down into the low-level bind routines through hints rather than
> inside the execbuffer routine.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72906
> Tested-by: jianx.zhou@intel.com
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>

Can you please split out the code cleanups into a separate patch? I like
them, but as is they're hiding the actual bugfix in the diff quite badly
imo.

Thanks, Daniel
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  25 +++---
>  drivers/gpu/drm/i915/i915_gem.c            | 119 +++++++++++------------------
>  drivers/gpu/drm/i915/i915_gem_context.c    |   9 +--
>  drivers/gpu/drm/i915/i915_gem_evict.c      |  12 +--
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  18 +++--
>  drivers/gpu/drm/i915/i915_gem_gtt.c        |   2 +-
>  drivers/gpu/drm/i915/i915_trace.h          |  20 ++---
>  drivers/gpu/drm/i915/intel_overlay.c       |   2 +-
>  drivers/gpu/drm/i915/intel_pm.c            |   2 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.c    |  19 +++--
>  10 files changed, 103 insertions(+), 125 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index cc8afff878b1..65379474f312 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2033,11 +2033,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  				     struct i915_address_space *vm,
>  				     uint32_t alignment,
> -				     bool map_and_fenceable,
> -				     bool nonblocking);
> -void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
> +				     unsigned flags);
> +#define PIN_MAPPABLE 0x1
> +#define PIN_GLOBAL 0x2
> +#define PIN_NONBLOCK 0x4
>  int __must_check i915_vma_unbind(struct i915_vma *vma);
> -int __must_check i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj);
>  int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
>  void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
>  void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
> @@ -2237,13 +2237,19 @@ i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj)
>  static inline int __must_check
>  i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
>  		      uint32_t alignment,
> -		      bool map_and_fenceable,
> -		      bool nonblocking)
> +		      unsigned flags)
>  {
> -	return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment,
> -				   map_and_fenceable, nonblocking);
> +	return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment, flags | PIN_GLOBAL);
>  }
>  
> +static inline int
> +i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
> +{
> +	return i915_vma_unbind(i915_gem_obj_to_ggtt(obj));
> +}
> +
> +void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
> +
>  /* i915_gem_context.c */
>  #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base)
>  int __must_check i915_gem_context_init(struct drm_device *dev);
> @@ -2280,8 +2286,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
>  					  int min_size,
>  					  unsigned alignment,
>  					  unsigned cache_level,
> -					  bool mappable,
> -					  bool nonblock);
> +					  unsigned flags);
>  int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>  int i915_gem_evict_everything(struct drm_device *dev);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 656406deada0..d9acfa78d1a5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -43,12 +43,6 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
>  static __must_check int
>  i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  			       bool readonly);
> -static __must_check int
> -i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> -			   struct i915_address_space *vm,
> -			   unsigned alignment,
> -			   bool map_and_fenceable,
> -			   bool nonblocking);
>  static int i915_gem_phys_pwrite(struct drm_device *dev,
>  				struct drm_i915_gem_object *obj,
>  				struct drm_i915_gem_pwrite *args,
> @@ -605,7 +599,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
>  	char __user *user_data;
>  	int page_offset, page_length, ret;
>  
> -	ret = i915_gem_obj_ggtt_pin(obj, 0, true, true);
> +	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
>  	if (ret)
>  		goto out;
>  
> @@ -1399,7 +1393,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	}
>  
>  	/* Now bind it into the GTT if needed */
> -	ret = i915_gem_obj_ggtt_pin(obj,  0, true, false);
> +	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
>  	if (ret)
>  		goto unlock;
>  
> @@ -2767,7 +2761,6 @@ int i915_vma_unbind(struct i915_vma *vma)
>  
>  	if (!drm_mm_node_allocated(&vma->node)) {
>  		i915_gem_vma_destroy(vma);
> -
>  		return 0;
>  	}
>  
> @@ -2819,26 +2812,6 @@ int i915_vma_unbind(struct i915_vma *vma)
>  	return 0;
>  }
>  
> -/**
> - * Unbinds an object from the global GTT aperture.
> - */
> -int
> -i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
> -{
> -	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> -	struct i915_address_space *ggtt = &dev_priv->gtt.base;
> -
> -	if (!i915_gem_obj_ggtt_bound(obj))
> -		return 0;
> -
> -	if (i915_gem_obj_to_ggtt(obj)->pin_count)
> -		return -EBUSY;
> -
> -	BUG_ON(obj->pages == NULL);
> -
> -	return i915_vma_unbind(i915_gem_obj_to_vma(obj, ggtt));
> -}
> -
>  int i915_gpu_idle(struct drm_device *dev)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> @@ -3256,18 +3229,17 @@ static void i915_gem_verify_gtt(struct drm_device *dev)
>  /**
>   * Finds free space in the GTT aperture and binds the object there.
>   */
> -static int
> +static struct i915_vma *
>  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  			   struct i915_address_space *vm,
>  			   unsigned alignment,
> -			   bool map_and_fenceable,
> -			   bool nonblocking)
> +			   unsigned flags)
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  	u32 size, fence_size, fence_alignment, unfenced_alignment;
>  	size_t gtt_max =
> -		map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total;
> +		flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
>  	struct i915_vma *vma;
>  	int ret;
>  
> @@ -3279,18 +3251,18 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  						     obj->tiling_mode, true);
>  	unfenced_alignment =
>  		i915_gem_get_gtt_alignment(dev,
> -						    obj->base.size,
> -						    obj->tiling_mode, false);
> +					   obj->base.size,
> +					   obj->tiling_mode, false);
>  
>  	if (alignment == 0)
> -		alignment = map_and_fenceable ? fence_alignment :
> +		alignment = flags & PIN_MAPPABLE ? fence_alignment :
>  						unfenced_alignment;
> -	if (map_and_fenceable && alignment & (fence_alignment - 1)) {
> +	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
>  		DRM_ERROR("Invalid object alignment requested %u\n", alignment);
> -		return -EINVAL;
> +		return ERR_PTR(-EINVAL);
>  	}
>  
> -	size = map_and_fenceable ? fence_size : obj->base.size;
> +	size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
>  
>  	/* If the object is bigger than the entire aperture, reject it early
>  	 * before evicting everything in a vain attempt to find space.
> @@ -3298,22 +3270,20 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  	if (obj->base.size > gtt_max) {
>  		DRM_ERROR("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n",
>  			  obj->base.size,
> -			  map_and_fenceable ? "mappable" : "total",
> +			  flags & PIN_MAPPABLE ? "mappable" : "total",
>  			  gtt_max);
> -		return -E2BIG;
> +		return ERR_PTR(-E2BIG);
>  	}
>  
>  	ret = i915_gem_object_get_pages(obj);
>  	if (ret)
> -		return ret;
> +		return ERR_PTR(ret);
>  
>  	i915_gem_object_pin_pages(obj);
>  
>  	vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
> -	if (IS_ERR(vma)) {
> -		ret = PTR_ERR(vma);
> +	if (IS_ERR(vma))
>  		goto err_unpin;
> -	}
>  
>  search_free:
>  	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> @@ -3322,9 +3292,7 @@ search_free:
>  						  DRM_MM_SEARCH_DEFAULT);
>  	if (ret) {
>  		ret = i915_gem_evict_something(dev, vm, size, alignment,
> -					       obj->cache_level,
> -					       map_and_fenceable,
> -					       nonblocking);
> +					       obj->cache_level, flags);
>  		if (ret == 0)
>  			goto search_free;
>  
> @@ -3355,19 +3323,23 @@ search_free:
>  		obj->map_and_fenceable = mappable && fenceable;
>  	}
>  
> -	WARN_ON(map_and_fenceable && !obj->map_and_fenceable);
> +	WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
> +
> +	trace_i915_vma_bind(vma, flags);
> +	vma->bind_vma(vma, obj->cache_level,
> +		      flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0);
>  
> -	trace_i915_vma_bind(vma, map_and_fenceable);
>  	i915_gem_verify_gtt(dev);
> -	return 0;
> +	return vma;
>  
>  err_remove_node:
>  	drm_mm_remove_node(&vma->node);
>  err_free_vma:
>  	i915_gem_vma_destroy(vma);
> +	vma = ERR_PTR(ret);
>  err_unpin:
>  	i915_gem_object_unpin_pages(obj);
> -	return ret;
> +	return vma;
>  }
>  
>  bool
> @@ -3559,7 +3531,9 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  		}
>  
>  		list_for_each_entry(vma, &obj->vma_list, vma_link)
> -			vma->bind_vma(vma, cache_level, 0);
> +			if (drm_mm_node_allocated(&vma->node))
> +				vma->bind_vma(vma, cache_level,
> +					      obj->has_global_gtt_mapping ? GLOBAL_BIND : 0);
>  	}
>  
>  	list_for_each_entry(vma, &obj->vma_list, vma_link)
> @@ -3728,7 +3702,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	 * (e.g. libkms for the bootup splash), we have to ensure that we
>  	 * always use map_and_fenceable for all scanout buffers.
>  	 */
> -	ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false);
> +	ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE);
>  	if (ret)
>  		goto err_unpin_display;
>  
> @@ -3884,52 +3858,49 @@ int
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  		    struct i915_address_space *vm,
>  		    uint32_t alignment,
> -		    bool map_and_fenceable,
> -		    bool nonblocking)
> +		    unsigned flags)
>  {
> -	const u32 flags = map_and_fenceable ? GLOBAL_BIND : 0;
>  	struct i915_vma *vma;
>  	int ret;
>  
> -	WARN_ON(map_and_fenceable && !i915_is_ggtt(vm));
> +	if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
> +		return -EINVAL;
>  
>  	vma = i915_gem_obj_to_vma(obj, vm);
> -
>  	if (vma) {
>  		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
>  			return -EBUSY;
>  
>  		if ((alignment &&
>  		     vma->node.start & (alignment - 1)) ||
> -		    (map_and_fenceable && !obj->map_and_fenceable)) {
> +		    (flags & PIN_MAPPABLE && !obj->map_and_fenceable)) {
>  			WARN(vma->pin_count,
>  			     "bo is already pinned with incorrect alignment:"
>  			     " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
>  			     " obj->map_and_fenceable=%d\n",
>  			     i915_gem_obj_offset(obj, vm), alignment,
> -			     map_and_fenceable,
> +			     flags & PIN_MAPPABLE,
>  			     obj->map_and_fenceable);
>  			ret = i915_vma_unbind(vma);
>  			if (ret)
>  				return ret;
> +
> +			vma = NULL;
>  		}
>  	}
>  
> -	if (!i915_gem_obj_bound(obj, vm)) {
> -		ret = i915_gem_object_bind_to_vm(obj, vm, alignment,
> -						 map_and_fenceable,
> -						 nonblocking);
> -		if (ret)
> -			return ret;
> -
> +	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
> +		vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags);
> +		if (IS_ERR(vma))
> +			return PTR_ERR(vma);
>  	}
>  
> -	vma = i915_gem_obj_to_vma(obj, vm);
> -
> -	vma->bind_vma(vma, obj->cache_level, flags);
> +	if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping)
> +		vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
>  
> -	i915_gem_obj_to_vma(obj, vm)->pin_count++;
> -	obj->pin_mappable |= map_and_fenceable;
> +	vma->pin_count++;
> +	if (flags & PIN_MAPPABLE)
> +		obj->pin_mappable |= true;
>  
>  	return 0;
>  }
> @@ -3987,7 +3958,7 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
>  	}
>  
>  	if (obj->user_pin_count == 0) {
> -		ret = i915_gem_obj_ggtt_pin(obj, args->alignment, true, false);
> +		ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE);
>  		if (ret)
>  			goto out;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index ebe0f67eac08..b428b101f9d0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -282,8 +282,7 @@ i915_gem_create_context(struct drm_device *dev,
>  			 * context.
>  			 */
>  			ret = i915_gem_obj_ggtt_pin(ctx->obj,
> -						    get_context_alignment(dev),
> -						    false, false);
> +						    get_context_alignment(dev), 0);
>  			if (ret) {
>  				DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
>  				goto err_destroy;
> @@ -336,8 +335,7 @@ void i915_gem_context_reset(struct drm_device *dev)
>  
>  		if (i == RCS) {
>  			WARN_ON(i915_gem_obj_ggtt_pin(dctx->obj,
> -						      get_context_alignment(dev),
> -						      false, false));
> +						      get_context_alignment(dev), 0));
>  			/* Fake a finish/inactive */
>  			dctx->obj->base.write_domain = 0;
>  			dctx->obj->active = 0;
> @@ -600,8 +598,7 @@ static int do_switch(struct intel_ring_buffer *ring,
>  	/* Trying to pin first makes error handling easier. */
>  	if (ring == &dev_priv->ring[RCS]) {
>  		ret = i915_gem_obj_ggtt_pin(to->obj,
> -					    get_context_alignment(ring->dev),
> -					    false, false);
> +					    get_context_alignment(ring->dev), 0);
>  		if (ret)
>  			return ret;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 54413ed46a46..fed72a67afd2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -48,14 +48,14 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
>  int
>  i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
>  			 int min_size, unsigned alignment, unsigned cache_level,
> -			 bool mappable, bool nonblocking)
> +			 unsigned flags)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  	struct list_head eviction_list, unwind_list;
>  	struct i915_vma *vma;
>  	int ret = 0;
>  
> -	trace_i915_gem_evict(dev, min_size, alignment, mappable);
> +	trace_i915_gem_evict(dev, min_size, alignment, flags);
>  
>  	/*
>  	 * The goal is to evict objects and amalgamate space in LRU order.
> @@ -81,7 +81,7 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
>  	 */
>  
>  	INIT_LIST_HEAD(&unwind_list);
> -	if (mappable) {
> +	if (flags & PIN_MAPPABLE) {
>  		BUG_ON(!i915_is_ggtt(vm));
>  		drm_mm_init_scan_with_range(&vm->mm, min_size,
>  					    alignment, cache_level, 0,
> @@ -96,7 +96,7 @@ search_again:
>  			goto found;
>  	}
>  
> -	if (nonblocking)
> +	if (flags & PIN_NONBLOCK)
>  		goto none;
>  
>  	/* Now merge in the soon-to-be-expired objects... */
> @@ -120,13 +120,13 @@ none:
>  	/* Can we unpin some objects such as idle hw contents,
>  	 * or pending flips?
>  	 */
> -	ret = nonblocking ? -ENOSPC : i915_gpu_idle(dev);
> +	ret = flags & PIN_NONBLOCK ? -ENOSPC : i915_gpu_idle(dev);
>  	if (ret)
>  		return ret;
>  
>  	/* Only idle the GPU and repeat the search once */
>  	i915_gem_retire_requests(dev);
> -	nonblocking = true;
> +	flags |= PIN_NONBLOCK;
>  	goto search_again;
>  
>  found:
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index bbff8f9b6549..cd09d42f507b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -544,19 +544,23 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  	struct drm_i915_gem_object *obj = vma->obj;
>  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
>  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> -	bool need_fence, need_mappable;
> -	u32 flags = (entry->flags & EXEC_OBJECT_NEEDS_GTT) &&
> -		!vma->obj->has_global_gtt_mapping ? GLOBAL_BIND : 0;
> +	bool need_fence;
> +	unsigned flags;
>  	int ret;
>  
> +	flags = 0;
> +
>  	need_fence =
>  		has_fenced_gpu_access &&
>  		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
>  		obj->tiling_mode != I915_TILING_NONE;
> -	need_mappable = need_fence || need_reloc_mappable(vma);
> +	if (need_fence || need_reloc_mappable(vma))
> +		flags |= PIN_MAPPABLE;
>  
> -	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, need_mappable,
> -				  false);
> +	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
> +		flags |= PIN_GLOBAL;
> +
> +	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
>  	if (ret)
>  		return ret;
>  
> @@ -585,8 +589,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
>  	}
>  
> -	vma->bind_vma(vma, obj->cache_level, flags);
> -
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 998f9a0b322a..d113eb5e2f5b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -865,7 +865,7 @@ alloc:
>  	if (ret == -ENOSPC && !retried) {
>  		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
>  					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
> -					       I915_CACHE_NONE, false, true);
> +					       I915_CACHE_NONE, 0);
>  		if (ret)
>  			return ret;
>  
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 6e580c98dede..b95a380958db 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -34,15 +34,15 @@ TRACE_EVENT(i915_gem_object_create,
>  );
>  
>  TRACE_EVENT(i915_vma_bind,
> -	    TP_PROTO(struct i915_vma *vma, bool mappable),
> -	    TP_ARGS(vma, mappable),
> +	    TP_PROTO(struct i915_vma *vma, unsigned flags),
> +	    TP_ARGS(vma, flags),
>  
>  	    TP_STRUCT__entry(
>  			     __field(struct drm_i915_gem_object *, obj)
>  			     __field(struct i915_address_space *, vm)
>  			     __field(u32, offset)
>  			     __field(u32, size)
> -			     __field(bool, mappable)
> +			     __field(unsigned, flags)
>  			     ),
>  
>  	    TP_fast_assign(
> @@ -50,12 +50,12 @@ TRACE_EVENT(i915_vma_bind,
>  			   __entry->vm = vma->vm;
>  			   __entry->offset = vma->node.start;
>  			   __entry->size = vma->node.size;
> -			   __entry->mappable = mappable;
> +			   __entry->flags = flags;
>  			   ),
>  
>  	    TP_printk("obj=%p, offset=%08x size=%x%s vm=%p",
>  		      __entry->obj, __entry->offset, __entry->size,
> -		      __entry->mappable ? ", mappable" : "",
> +		      __entry->flags & PIN_MAPPABLE ? ", mappable" : "",
>  		      __entry->vm)
>  );
>  
> @@ -196,26 +196,26 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy,
>  );
>  
>  TRACE_EVENT(i915_gem_evict,
> -	    TP_PROTO(struct drm_device *dev, u32 size, u32 align, bool mappable),
> -	    TP_ARGS(dev, size, align, mappable),
> +	    TP_PROTO(struct drm_device *dev, u32 size, u32 align, unsigned flags),
> +	    TP_ARGS(dev, size, align, flags),
>  
>  	    TP_STRUCT__entry(
>  			     __field(u32, dev)
>  			     __field(u32, size)
>  			     __field(u32, align)
> -			     __field(bool, mappable)
> +			     __field(unsigned, flags)
>  			    ),
>  
>  	    TP_fast_assign(
>  			   __entry->dev = dev->primary->index;
>  			   __entry->size = size;
>  			   __entry->align = align;
> -			   __entry->mappable = mappable;
> +			   __entry->flags = flags;
>  			  ),
>  
>  	    TP_printk("dev=%d, size=%d, align=%d %s",
>  		      __entry->dev, __entry->size, __entry->align,
> -		      __entry->mappable ? ", mappable" : "")
> +		      __entry->flags & PIN_MAPPABLE ? ", mappable" : "")
>  );
>  
>  TRACE_EVENT(i915_gem_evict_everything,
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index a1397b1646af..1d46608bbd8b 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -1349,7 +1349,7 @@ void intel_setup_overlay(struct drm_device *dev)
>  		}
>  		overlay->flip_addr = reg_bo->phys_obj->handle->busaddr;
>  	} else {
> -		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, true, false);
> +		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE);
>  		if (ret) {
>  			DRM_ERROR("failed to pin overlay register bo\n");
>  			goto out_free_bo;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 469170c11bb4..e29804eda113 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2753,7 +2753,7 @@ intel_alloc_context_page(struct drm_device *dev)
>  		return NULL;
>  	}
>  
> -	ret = i915_gem_obj_ggtt_pin(ctx, 4096, true, false);
> +	ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0);
>  	if (ret) {
>  		DRM_ERROR("failed to pin power context: %d\n", ret);
>  		goto err_unref;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 442c9a66ebab..5744841669e4 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -531,9 +531,11 @@ init_pipe_control(struct intel_ring_buffer *ring)
>  		goto err;
>  	}
>  
> -	i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
> +	ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
> +	if (ret)
> +		goto err_unref;
>  
> -	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, true, false);
> +	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
>  	if (ret)
>  		goto err_unref;
>  
> @@ -1269,12 +1271,13 @@ static int init_status_page(struct intel_ring_buffer *ring)
>  		goto err;
>  	}
>  
> -	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> +	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
> +	if (ret)
> +		goto err_unref;
>  
> -	ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false);
> -	if (ret != 0) {
> +	ret = i915_gem_obj_ggtt_pin(obj, 4096, 0);
> +	if (ret)
>  		goto err_unref;
> -	}
>  
>  	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
>  	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
> @@ -1354,7 +1357,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>  
>  	ring->obj = obj;
>  
> -	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
> +	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
>  	if (ret)
>  		goto err_unref;
>  
> @@ -1927,7 +1930,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  			return -ENOMEM;
>  		}
>  
> -		ret = i915_gem_obj_ggtt_pin(obj, 0, true, false);
> +		ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
>  		if (ret != 0) {
>  			drm_gem_object_unreference(&obj->base);
>  			DRM_ERROR("Failed to ping batch bo\n");
> -- 
> 1.8.5.2
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer
  2014-01-07  7:43 ` Daniel Vetter
@ 2014-01-08 10:30   ` Chris Wilson
  2014-01-08 10:38     ` Daniel Vetter
  0 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2014-01-08 10:30 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Daniel Vetter, intel-gfx, Ben Widawsky

On Tue, Jan 07, 2014 at 08:43:28AM +0100, Daniel Vetter wrote:
> On Wed, Jan 01, 2014 at 02:00:54PM +0000, Chris Wilson wrote:
> > One side-effect of the introduction of ppgtt was that we needed to
> > rebind the object into the appropriate vm (and global gtt in some
> > peculiar cases). For simplicity this was done twice for every object on
> > every call to execbuffer. However, that adds a tremendous amount of CPU
> > overhead (rewriting all the PTE for all objects into WC memory) per
> > draw. The fix is to push all the decision about which vm to bind into
> > and when down into the low-level bind routines through hints rather than
> > inside the execbuffer routine.
> > 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72906
> > Tested-by: jianx.zhou@intel.com
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> Can you please split out the code cleanups into a separate patch? I like
> them, but as is they're hiding the actual bugfix in the diff quite badly
> imo.

Which part of this is cleanup?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer
  2014-01-08 10:30   ` Chris Wilson
@ 2014-01-08 10:38     ` Daniel Vetter
  0 siblings, 0 replies; 8+ messages in thread
From: Daniel Vetter @ 2014-01-08 10:38 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx, Ben Widawsky,
	Daniel Vetter

On Wed, Jan 08, 2014 at 10:30:16AM +0000, Chris Wilson wrote:
> On Tue, Jan 07, 2014 at 08:43:28AM +0100, Daniel Vetter wrote:
> > On Wed, Jan 01, 2014 at 02:00:54PM +0000, Chris Wilson wrote:
> > > One side-effect of the introduction of ppgtt was that we needed to
> > > rebind the object into the appropriate vm (and global gtt in some
> > > peculiar cases). For simplicity this was done twice for every object on
> > > every call to execbuffer. However, that adds a tremendous amount of CPU
> > > overhead (rewriting all the PTE for all objects into WC memory) per
> > > draw. The fix is to push all the decision about which vm to bind into
> > > and when down into the low-level bind routines through hints rather than
> > > inside the execbuffer routine.
> > > 
> > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72906
> > > Tested-by: jianx.zhou@intel.com
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> > > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > 
> > Can you please split out the code cleanups into a separate patch? I like
> > them, but as is they're hiding the actual bugfix in the diff quite badly
> > imo.
> 
> Which part of this is cleanup?

Coalescing the two boolean parameters mappable and nonblocking into the
flags parameter in a bunch of bind/pin functions and the return value
change for bind_to_vm look like prep cleanup. And there's 1-2 whitespace
fixup hunks in there. Splitting that out as a prep patch will make the
actual fix stick out much more I think.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-01-08 10:37 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-01-01 14:00 [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer Chris Wilson
2014-01-01 14:00 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
2014-01-01 14:00 ` [PATCH 3/3] drm/i915: Remove redundant list_empty(eb->vmas) tests in execbuffer Chris Wilson
2014-01-01 21:51 ` [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer Ben Widawsky
2014-01-02 10:37   ` Chris Wilson
2014-01-07  7:43 ` Daniel Vetter
2014-01-08 10:30   ` Chris Wilson
2014-01-08 10:38     ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.