intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs
@ 2017-08-16  8:52 Chris Wilson
  2017-08-16  8:52 ` [PATCH 2/7] drm/i915: Check context status before looking up our obj/vma Chris Wilson
                   ` (8 more replies)
  0 siblings, 9 replies; 19+ messages in thread
From: Chris Wilson @ 2017-08-16  8:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: drm-intel-fixes

MI_STORE_DWORD_IMM just doesn't work on the video decode engine under
Sandybridge, so refrain from using it. Then switch the selftests over to
using the now common test prior to using MI_STORE_DWORD_IMM.

Fixes: 7dd4f6729f92 ("drm/i915: Async GPU relocation processing")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.13-rc1+
---
 drivers/gpu/drm/i915/i915_drv.h                     |  7 +++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c          |  8 ++++----
 drivers/gpu/drm/i915/i915_selftest.h                |  2 --
 drivers/gpu/drm/i915/intel_ringbuffer.h             | 12 ++++++++++++
 drivers/gpu/drm/i915/selftests/i915_gem_coherency.c |  2 +-
 drivers/gpu/drm/i915/selftests/i915_gem_context.c   |  6 ++++--
 drivers/gpu/drm/i915/selftests/intel_hangcheck.c    | 18 ++++++++++++------
 7 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6c25c8520c87..620e53bd705a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4327,4 +4327,11 @@ int remap_io_mapping(struct vm_area_struct *vma,
 		     unsigned long addr, unsigned long pfn, unsigned long size,
 		     struct io_mapping *iomap);
 
+static inline bool
+intel_engine_can_store_dword(struct intel_engine_cs *engine)
+{
+	return __intel_engine_can_store_dword(INTEL_GEN(engine->i915),
+					      engine->class);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8e8bc7aefd9c..359d5dc6d8df 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1268,7 +1268,9 @@ relocate_entry(struct i915_vma *vma,
 
 	if (!eb->reloc_cache.vaddr &&
 	    (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
-	     !reservation_object_test_signaled_rcu(vma->resv, true))) {
+	     !reservation_object_test_signaled_rcu(vma->resv, true)) &&
+	    __intel_engine_can_store_dword(eb->reloc_cache.gen,
+					   eb->engine->class)) {
 		const unsigned int gen = eb->reloc_cache.gen;
 		unsigned int len;
 		u32 *batch;
@@ -1278,10 +1280,8 @@ relocate_entry(struct i915_vma *vma,
 			len = offset & 7 ? 8 : 5;
 		else if (gen >= 4)
 			len = 4;
-		else if (gen >= 3)
+		else
 			len = 3;
-		else /* On gen2 MI_STORE_DWORD_IMM uses a physical address */
-			goto repeat;
 
 		batch = reloc_gpu(eb, vma, len);
 		if (IS_ERR(batch))
diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h
index 9d7d86f1733d..78e1a1b168ff 100644
--- a/drivers/gpu/drm/i915/i915_selftest.h
+++ b/drivers/gpu/drm/i915/i915_selftest.h
@@ -101,6 +101,4 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...);
 #define igt_timeout(t, fmt, ...) \
 	__igt_timeout((t), KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
 
-#define igt_can_mi_store_dword_imm(D) (INTEL_GEN(D) > 2)
-
 #endif /* !__I915_SELFTEST_H__ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index d33c93444c0d..02d8974bf9ab 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -735,4 +735,16 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 void intel_engines_mark_idle(struct drm_i915_private *i915);
 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
 
+static inline bool
+__intel_engine_can_store_dword(unsigned int gen, unsigned int class)
+{
+	if (gen <= 2)
+		return false; /* uses physical not virtual addresses */
+
+	if (gen == 6 && class == VIDEO_DECODE_CLASS)
+		return false; /* b0rked */
+
+	return true;
+}
+
 #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index 95d4aebc0181..35d778d70626 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -241,7 +241,7 @@ static bool always_valid(struct drm_i915_private *i915)
 
 static bool needs_mi_store_dword(struct drm_i915_private *i915)
 {
-	return igt_can_mi_store_dword_imm(i915);
+	return intel_engine_can_store_dword(i915->engine[RCS]);
 }
 
 static const struct igt_coherency_mode {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 12b85b3278cd..fb0a58fc8348 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -38,8 +38,6 @@ gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
 	u32 *cmd;
 	int err;
 
-	GEM_BUG_ON(!igt_can_mi_store_dword_imm(vma->vm->i915));
-
 	size = (4 * count + 1) * sizeof(u32);
 	size = round_up(size, PAGE_SIZE);
 	obj = i915_gem_object_create_internal(vma->vm->i915, size);
@@ -123,6 +121,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	int err;
 
 	GEM_BUG_ON(obj->base.size > vm->total);
+	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
 
 	vma = i915_vma_instance(obj, vm, NULL);
 	if (IS_ERR(vma))
@@ -359,6 +358,9 @@ static int igt_ctx_exec(void *arg)
 		}
 
 		for_each_engine(engine, i915, id) {
+			if (!intel_engine_can_store_dword(engine))
+				continue;
+
 			if (!obj) {
 				obj = create_test_object(ctx, file, &objects);
 				if (IS_ERR(obj)) {
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 208b34e864fb..02e52a146ed8 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -253,9 +253,6 @@ static int igt_hang_sanitycheck(void *arg)
 
 	/* Basic check that we can execute our hanging batch */
 
-	if (!igt_can_mi_store_dword_imm(i915))
-		return 0;
-
 	mutex_lock(&i915->drm.struct_mutex);
 	err = hang_init(&h, i915);
 	if (err)
@@ -264,6 +261,9 @@ static int igt_hang_sanitycheck(void *arg)
 	for_each_engine(engine, i915, id) {
 		long timeout;
 
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
 		rq = hang_create_request(&h, engine, i915->kernel_context);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
@@ -599,6 +599,9 @@ static int igt_wait_reset(void *arg)
 	long timeout;
 	int err;
 
+	if (!intel_engine_can_store_dword(i915->engine[RCS]))
+		return 0;
+
 	/* Check that we detect a stuck waiter and issue a reset */
 
 	global_reset_lock(i915);
@@ -664,9 +667,6 @@ static int igt_reset_queue(void *arg)
 
 	/* Check that we replay pending requests following a hang */
 
-	if (!igt_can_mi_store_dword_imm(i915))
-		return 0;
-
 	global_reset_lock(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
@@ -679,6 +679,9 @@ static int igt_reset_queue(void *arg)
 		IGT_TIMEOUT(end_time);
 		unsigned int count;
 
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
 		prev = hang_create_request(&h, engine, i915->kernel_context);
 		if (IS_ERR(prev)) {
 			err = PTR_ERR(prev);
@@ -784,6 +787,9 @@ static int igt_handle_error(void *arg)
 	if (!intel_has_reset_engine(i915))
 		return 0;
 
+	if (!intel_engine_can_store_dword(i915->engine[RCS]))
+		return 0;
+
 	mutex_lock(&i915->drm.struct_mutex);
 
 	err = hang_init(&h, i915);
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/7] drm/i915: Check context status before looking up our obj/vma
  2017-08-16  8:52 [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Chris Wilson
@ 2017-08-16  8:52 ` Chris Wilson
  2017-08-17 14:10   ` Mika Kuoppala
  2017-08-16  8:52 ` [PATCH 3/7] drm/i915: Convert execbuf to use struct-of-array packing for critical fields Chris Wilson
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2017-08-16  8:52 UTC (permalink / raw)
  To: intel-gfx

Since we keep the context around across the slow lookup where we may
drop the struct_mutex, we should double check that the context is still
valid upon reacquisition.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 359d5dc6d8df..044fb1205554 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -679,13 +679,6 @@ static int eb_select_context(struct i915_execbuffer *eb)
 	if (unlikely(!ctx))
 		return -ENOENT;
 
-	if (unlikely(i915_gem_context_is_banned(ctx))) {
-		DRM_DEBUG("Context %u tried to submit while banned\n",
-			  ctx->user_handle);
-		i915_gem_context_put(ctx);
-		return -EIO;
-	}
-
 	eb->ctx = ctx;
 	eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base;
 
@@ -707,6 +700,12 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	int slow_pass = -1;
 	int err;
 
+	if (unlikely(i915_gem_context_is_closed(eb->ctx)))
+		return -ENOENT;
+
+	if (unlikely(i915_gem_context_is_banned(eb->ctx)))
+		return -EIO;
+
 	INIT_LIST_HEAD(&eb->relocs);
 	INIT_LIST_HEAD(&eb->unbound);
 
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 3/7] drm/i915: Convert execbuf to use struct-of-array packing for critical fields
  2017-08-16  8:52 [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Chris Wilson
  2017-08-16  8:52 ` [PATCH 2/7] drm/i915: Check context status before looking up our obj/vma Chris Wilson
@ 2017-08-16  8:52 ` Chris Wilson
  2017-08-16  8:52 ` [PATCH 4/7] drm/i915: Simplify eb_lookup_vmas() Chris Wilson
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2017-08-16  8:52 UTC (permalink / raw)
  To: intel-gfx

When userspace is doing most of the work, avoiding relocs (using
NO_RELOC) and opting out of implicit synchronisation (using ASYNC), we
still spend a lot of time processing the arrays in execbuf, even though
we now should have nothing to do most of the time. One issue that
becomes readily apparent in profiling anv is that iterating over the
large execobj[] is unfriendly to the loop prefetchers of the CPU and it
much prefers iterating over a pair of arrays rather than one big array.

v2: Clear vma[] on construction to handle errors during vma lookup

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_evict.c      |   4 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 301 +++++++++++++++--------------
 drivers/gpu/drm/i915/i915_vma.h            |   2 +-
 3 files changed, 156 insertions(+), 151 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index a193f1b36c67..4df039ef2ce3 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -318,8 +318,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 		/* Overlap of objects in the same batch? */
 		if (i915_vma_is_pinned(vma)) {
 			ret = -ENOSPC;
-			if (vma->exec_entry &&
-			    vma->exec_entry->flags & EXEC_OBJECT_PINNED)
+			if (vma->exec_flags &&
+			    *vma->exec_flags & EXEC_OBJECT_PINNED)
 				ret = -EINVAL;
 			break;
 		}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 044fb1205554..da6cb2fe5f85 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -192,6 +192,8 @@ struct i915_execbuffer {
 	struct drm_file *file; /** per-file lookup tables and limits */
 	struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
 	struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
+	struct i915_vma **vma;
+	unsigned int *flags;
 
 	struct intel_engine_cs *engine; /** engine to queue the request to */
 	struct i915_gem_context *ctx; /** context for building the request */
@@ -245,13 +247,7 @@ struct i915_execbuffer {
 	struct hlist_head *buckets; /** ht for relocation handles */
 };
 
-/*
- * As an alternative to creating a hashtable of handle-to-vma for a batch,
- * we used the last available reserved field in the execobject[] and stash
- * a link from the execobj to its vma.
- */
-#define __exec_to_vma(ee) (ee)->rsvd2
-#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee))
+#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
 
 /*
  * Used to convert any address to canonical form.
@@ -320,85 +316,82 @@ static int eb_create(struct i915_execbuffer *eb)
 
 static bool
 eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
-		 const struct i915_vma *vma)
+		 const struct i915_vma *vma,
+		 unsigned int flags)
 {
-	if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
-		return true;
-
 	if (vma->node.size < entry->pad_to_size)
 		return true;
 
 	if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
 		return true;
 
-	if (entry->flags & EXEC_OBJECT_PINNED &&
+	if (flags & EXEC_OBJECT_PINNED &&
 	    vma->node.start != entry->offset)
 		return true;
 
-	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
+	if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
 	    vma->node.start < BATCH_OFFSET_BIAS)
 		return true;
 
-	if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
+	if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
 	    (vma->node.start + vma->node.size - 1) >> 32)
 		return true;
 
 	return false;
 }
 
-static inline void
+static inline bool
 eb_pin_vma(struct i915_execbuffer *eb,
-	   struct drm_i915_gem_exec_object2 *entry,
+	   const struct drm_i915_gem_exec_object2 *entry,
 	   struct i915_vma *vma)
 {
-	u64 flags;
+	unsigned int exec_flags = *vma->exec_flags;
+	u64 pin_flags;
 
 	if (vma->node.size)
-		flags = vma->node.start;
+		pin_flags = vma->node.start;
 	else
-		flags = entry->offset & PIN_OFFSET_MASK;
+		pin_flags = entry->offset & PIN_OFFSET_MASK;
 
-	flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
-	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_GTT))
-		flags |= PIN_GLOBAL;
+	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
+	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT))
+		pin_flags |= PIN_GLOBAL;
 
-	if (unlikely(i915_vma_pin(vma, 0, 0, flags)))
-		return;
+	if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
+		return false;
 
-	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
+	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
 		if (unlikely(i915_vma_get_fence(vma))) {
 			i915_vma_unpin(vma);
-			return;
+			return false;
 		}
 
 		if (i915_vma_pin_fence(vma))
-			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
-	entry->flags |= __EXEC_OBJECT_HAS_PIN;
+	*vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
+	return !eb_vma_misplaced(entry, vma, exec_flags);
 }
 
-static inline void
-__eb_unreserve_vma(struct i915_vma *vma,
-		   const struct drm_i915_gem_exec_object2 *entry)
+static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
 {
-	GEM_BUG_ON(!(entry->flags & __EXEC_OBJECT_HAS_PIN));
+	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
 
-	if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE))
+	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
 		i915_vma_unpin_fence(vma);
 
 	__i915_vma_unpin(vma);
 }
 
 static inline void
-eb_unreserve_vma(struct i915_vma *vma,
-		 struct drm_i915_gem_exec_object2 *entry)
+eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
 {
-	if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
+	if (!(*flags & __EXEC_OBJECT_HAS_PIN))
 		return;
 
-	__eb_unreserve_vma(vma, entry);
-	entry->flags &= ~__EXEC_OBJECT_RESERVED;
+	__eb_unreserve_vma(vma, *flags);
+	*flags &= ~__EXEC_OBJECT_RESERVED;
 }
 
 static int
@@ -428,7 +421,7 @@ eb_validate_vma(struct i915_execbuffer *eb,
 		entry->pad_to_size = 0;
 	}
 
-	if (unlikely(vma->exec_entry)) {
+	if (unlikely(vma->exec_flags)) {
 		DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
 			  entry->handle, (int)(entry - eb->exec));
 		return -EINVAL;
@@ -441,14 +434,25 @@ eb_validate_vma(struct i915_execbuffer *eb,
 	 */
 	entry->offset = gen8_noncanonical_addr(entry->offset);
 
+	if (!eb->reloc_cache.has_fence) {
+		entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
+	} else {
+		if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
+		     eb->reloc_cache.needs_unfenced) &&
+		    i915_gem_object_is_tiled(vma->obj))
+			entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
+	}
+
+	if (!(entry->flags & EXEC_OBJECT_PINNED))
+		entry->flags |= eb->context_flags;
+
 	return 0;
 }
 
 static int
-eb_add_vma(struct i915_execbuffer *eb,
-	   struct drm_i915_gem_exec_object2 *entry,
-	   struct i915_vma *vma)
+eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
 {
+	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
 	int err;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
@@ -469,40 +473,28 @@ eb_add_vma(struct i915_execbuffer *eb,
 	if (entry->relocation_count)
 		list_add_tail(&vma->reloc_link, &eb->relocs);
 
-	if (!eb->reloc_cache.has_fence) {
-		entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
-	} else {
-		if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
-		     eb->reloc_cache.needs_unfenced) &&
-		    i915_gem_object_is_tiled(vma->obj))
-			entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
-	}
-
-	if (!(entry->flags & EXEC_OBJECT_PINNED))
-		entry->flags |= eb->context_flags;
-
 	/*
 	 * Stash a pointer from the vma to execobj, so we can query its flags,
 	 * size, alignment etc as provided by the user. Also we stash a pointer
 	 * to the vma inside the execobj so that we can use a direct lookup
 	 * to find the right target VMA when doing relocations.
 	 */
-	vma->exec_entry = entry;
-	__exec_to_vma(entry) = (uintptr_t)vma;
+	eb->vma[i] = vma;
+	eb->flags[i] = entry->flags;
+	vma->exec_flags = &eb->flags[i];
 
 	err = 0;
-	eb_pin_vma(eb, entry, vma);
-	if (eb_vma_misplaced(entry, vma)) {
-		eb_unreserve_vma(vma, entry);
-
-		list_add_tail(&vma->exec_link, &eb->unbound);
-		if (drm_mm_node_allocated(&vma->node))
-			err = i915_vma_unbind(vma);
-	} else {
+	if (eb_pin_vma(eb, entry, vma)) {
 		if (entry->offset != vma->node.start) {
 			entry->offset = vma->node.start | UPDATE;
 			eb->args->flags |= __EXEC_HAS_RELOC;
 		}
+	} else {
+		eb_unreserve_vma(vma, vma->exec_flags);
+
+		list_add_tail(&vma->exec_link, &eb->unbound);
+		if (drm_mm_node_allocated(&vma->node))
+			err = i915_vma_unbind(vma);
 	}
 	return err;
 }
@@ -527,32 +519,35 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
 static int eb_reserve_vma(const struct i915_execbuffer *eb,
 			  struct i915_vma *vma)
 {
-	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	u64 flags;
+	struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
+	unsigned int exec_flags = *vma->exec_flags;
+	u64 pin_flags;
 	int err;
 
-	flags = PIN_USER | PIN_NONBLOCK;
-	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
-		flags |= PIN_GLOBAL;
+	pin_flags = PIN_USER | PIN_NONBLOCK;
+	if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
+		pin_flags |= PIN_GLOBAL;
 
 	/*
 	 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
 	 * limit address to the first 4GBs for unflagged objects.
 	 */
-	if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
-		flags |= PIN_ZONE_4G;
+	if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+		pin_flags |= PIN_ZONE_4G;
 
-	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
-		flags |= PIN_MAPPABLE;
+	if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
+		pin_flags |= PIN_MAPPABLE;
 
-	if (entry->flags & EXEC_OBJECT_PINNED) {
-		flags |= entry->offset | PIN_OFFSET_FIXED;
-		flags &= ~PIN_NONBLOCK; /* force overlapping PINNED checks */
-	} else if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) {
-		flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+	if (exec_flags & EXEC_OBJECT_PINNED) {
+		pin_flags |= entry->offset | PIN_OFFSET_FIXED;
+		pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */
+	} else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) {
+		pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
 	}
 
-	err = i915_vma_pin(vma, entry->pad_to_size, entry->alignment, flags);
+	err = i915_vma_pin(vma,
+			   entry->pad_to_size, entry->alignment,
+			   pin_flags);
 	if (err)
 		return err;
 
@@ -561,7 +556,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 		eb->args->flags |= __EXEC_HAS_RELOC;
 	}
 
-	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
+	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
 		err = i915_vma_get_fence(vma);
 		if (unlikely(err)) {
 			i915_vma_unpin(vma);
@@ -569,11 +564,11 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 		}
 
 		if (i915_vma_pin_fence(vma))
-			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
-	entry->flags |= __EXEC_OBJECT_HAS_PIN;
-	GEM_BUG_ON(eb_vma_misplaced(entry, vma));
+	*vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
+	GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags));
 
 	return 0;
 }
@@ -615,18 +610,18 @@ static int eb_reserve(struct i915_execbuffer *eb)
 		INIT_LIST_HEAD(&eb->unbound);
 		INIT_LIST_HEAD(&last);
 		for (i = 0; i < count; i++) {
-			struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+			unsigned int flags = eb->flags[i];
+			struct i915_vma *vma = eb->vma[i];
 
-			if (entry->flags & EXEC_OBJECT_PINNED &&
-			    entry->flags & __EXEC_OBJECT_HAS_PIN)
+			if (flags & EXEC_OBJECT_PINNED &&
+			    flags & __EXEC_OBJECT_HAS_PIN)
 				continue;
 
-			vma = exec_to_vma(entry);
-			eb_unreserve_vma(vma, entry);
+			eb_unreserve_vma(vma, &eb->flags[i]);
 
-			if (entry->flags & EXEC_OBJECT_PINNED)
+			if (flags & EXEC_OBJECT_PINNED)
 				list_add(&vma->exec_link, &eb->unbound);
-			else if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
+			else if (flags & __EXEC_OBJECT_NEEDS_MAP)
 				list_add_tail(&vma->exec_link, &eb->unbound);
 			else
 				list_add_tail(&vma->exec_link, &last);
@@ -714,18 +709,15 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
 
 	for (i = 0; i < count; i++) {
-		__exec_to_vma(&eb->exec[i]) = 0;
-
 		hlist_for_each_entry(vma,
 				     ht_head(lut, eb->exec[i].handle),
 				     ctx_node) {
 			if (vma->ctx_handle != eb->exec[i].handle)
 				continue;
 
-			err = eb_add_vma(eb, &eb->exec[i], vma);
+			err = eb_add_vma(eb, i, vma);
 			if (unlikely(err))
 				return err;
-
 			goto next_vma;
 		}
 
@@ -746,7 +738,7 @@ next_vma: ;
 	for (i = slow_pass; i < count; i++) {
 		struct drm_i915_gem_object *obj;
 
-		if (__exec_to_vma(&eb->exec[i]))
+		if (eb->vma[i])
 			continue;
 
 		obj = to_intel_bo(idr_find(idr, eb->exec[i].handle));
@@ -758,14 +750,17 @@ next_vma: ;
 			goto err;
 		}
 
-		__exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj;
+		eb->vma[i] = (struct i915_vma *)
+			ptr_pack_bits(obj, INTERMEDIATE, 1);
 	}
 	spin_unlock(&eb->file->table_lock);
 
 	for (i = slow_pass; i < count; i++) {
 		struct drm_i915_gem_object *obj;
+		unsigned int is_obj;
 
-		if (!(__exec_to_vma(&eb->exec[i]) & INTERMEDIATE))
+		obj = (typeof(obj))ptr_unpack_bits(eb->vma[i], &is_obj, 1);
+		if (!is_obj)
 			continue;
 
 		/*
@@ -776,8 +771,6 @@ next_vma: ;
 		 * from the (obj, vm) we don't run the risk of creating
 		 * duplicated vmas for the same vm.
 		 */
-		obj = u64_to_ptr(typeof(*obj),
-				 __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE);
 		vma = i915_vma_instance(obj, eb->vm, NULL);
 		if (unlikely(IS_ERR(vma))) {
 			DRM_DEBUG("Failed to lookup VMA\n");
@@ -801,14 +794,17 @@ next_vma: ;
 			i915_vma_get(vma);
 		}
 
-		err = eb_add_vma(eb, &eb->exec[i], vma);
+		err = eb_add_vma(eb, i, vma);
 		if (unlikely(err))
 			goto err;
 
+		GEM_BUG_ON(vma != eb->vma[i]);
+		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
+
 		/* Only after we validated the user didn't use our bits */
 		if (vma->ctx != eb->ctx) {
 			i915_vma_get(vma);
-			eb->exec[i].flags |= __EXEC_OBJECT_HAS_REF;
+			*vma->exec_flags |= __EXEC_OBJECT_HAS_REF;
 		}
 	}
 
@@ -822,7 +818,8 @@ next_vma: ;
 out:
 	/* take note of the batch buffer before we might reorder the lists */
 	i = eb_batch_index(eb);
-	eb->batch = exec_to_vma(&eb->exec[i]);
+	eb->batch = eb->vma[i];
+	GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]);
 
 	/*
 	 * SNA is doing fancy tricks with compressing batch buffers, which leads
@@ -833,18 +830,18 @@ next_vma: ;
 	 * Note that actual hangs have only been observed on gen7, but for
 	 * paranoia do it everywhere.
 	 */
-	if (!(eb->exec[i].flags & EXEC_OBJECT_PINNED))
-		eb->exec[i].flags |= __EXEC_OBJECT_NEEDS_BIAS;
+	if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
+		eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
 	if (eb->reloc_cache.has_fence)
-		eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE;
+		eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
 
 	eb->args->flags |= __EXEC_VALIDATED;
 	return eb_reserve(eb);
 
 err:
 	for (i = slow_pass; i < count; i++) {
-		if (__exec_to_vma(&eb->exec[i]) & INTERMEDIATE)
-			__exec_to_vma(&eb->exec[i]) = 0;
+		if (ptr_unmask_bits(eb->vma[i], 1))
+			eb->vma[i] = NULL;
 	}
 	lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
 	return err;
@@ -857,7 +854,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
 	if (eb->lut_size < 0) {
 		if (handle >= -eb->lut_size)
 			return NULL;
-		return exec_to_vma(&eb->exec[handle]);
+		return eb->vma[handle];
 	} else {
 		struct hlist_head *head;
 		struct i915_vma *vma;
@@ -877,24 +874,21 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
 	unsigned int i;
 
 	for (i = 0; i < count; i++) {
-		struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
-		struct i915_vma *vma = exec_to_vma(entry);
+		struct i915_vma *vma = eb->vma[i];
+		unsigned int flags = eb->flags[i];
 
 		if (!vma)
 			continue;
 
-		GEM_BUG_ON(vma->exec_entry != entry);
-		vma->exec_entry = NULL;
-		__exec_to_vma(entry) = 0;
+		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
+		vma->exec_flags = NULL;
+		eb->vma[i] = NULL;
 
-		if (entry->flags & __EXEC_OBJECT_HAS_PIN)
-			__eb_unreserve_vma(vma, entry);
+		if (flags & __EXEC_OBJECT_HAS_PIN)
+			__eb_unreserve_vma(vma, flags);
 
-		if (entry->flags & __EXEC_OBJECT_HAS_REF)
+		if (flags & __EXEC_OBJECT_HAS_REF)
 			i915_vma_put(vma);
-
-		entry->flags &=
-			~(__EXEC_OBJECT_RESERVED | __EXEC_OBJECT_HAS_REF);
 	}
 }
 
@@ -1383,7 +1377,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 	}
 
 	if (reloc->write_domain) {
-		target->exec_entry->flags |= EXEC_OBJECT_WRITE;
+		*target->exec_flags |= EXEC_OBJECT_WRITE;
 
 		/*
 		 * Sandybridge PPGTT errata: We need a global gtt mapping
@@ -1435,7 +1429,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 	 * do relocations we are already stalling, disable the user's opt
 	 * of our synchronisation.
 	 */
-	vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
+	*vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
 
 	/* and update the user's relocation entry */
 	return relocate_entry(vma, reloc, eb, target);
@@ -1446,7 +1440,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
 	struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
 	struct drm_i915_gem_relocation_entry __user *urelocs;
-	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+	const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
 	unsigned int remain;
 
 	urelocs = u64_to_user_ptr(entry->relocs_ptr);
@@ -1529,7 +1523,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
 static int
 eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
 {
-	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+	const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
 	struct drm_i915_gem_relocation_entry *relocs =
 		u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
 	unsigned int i;
@@ -1733,6 +1727,8 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
 	if (err)
 		goto err;
 
+	GEM_BUG_ON(!eb->batch);
+
 	list_for_each_entry(vma, &eb->relocs, reloc_link) {
 		if (!have_copy) {
 			pagefault_disable();
@@ -1826,11 +1822,11 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 	int err;
 
 	for (i = 0; i < count; i++) {
-		struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
-		struct i915_vma *vma = exec_to_vma(entry);
+		unsigned int flags = eb->flags[i];
+		struct i915_vma *vma = eb->vma[i];
 		struct drm_i915_gem_object *obj = vma->obj;
 
-		if (entry->flags & EXEC_OBJECT_CAPTURE) {
+		if (flags & EXEC_OBJECT_CAPTURE) {
 			struct i915_gem_capture_list *capture;
 
 			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
@@ -1838,7 +1834,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 				return -ENOMEM;
 
 			capture->next = eb->request->capture_list;
-			capture->vma = vma;
+			capture->vma = eb->vma[i];
 			eb->request->capture_list = capture;
 		}
 
@@ -1856,29 +1852,29 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		 */
 		if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
 			if (i915_gem_clflush_object(obj, 0))
-				entry->flags &= ~EXEC_OBJECT_ASYNC;
+				flags &= ~EXEC_OBJECT_ASYNC;
 		}
 
-		if (entry->flags & EXEC_OBJECT_ASYNC)
-			goto skip_flushes;
+		if (flags & EXEC_OBJECT_ASYNC)
+			continue;
 
 		err = i915_gem_request_await_object
-			(eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
+			(eb->request, obj, flags & EXEC_OBJECT_WRITE);
 		if (err)
 			return err;
-
-skip_flushes:
-		i915_vma_move_to_active(vma, eb->request, entry->flags);
-		__eb_unreserve_vma(vma, entry);
-		vma->exec_entry = NULL;
 	}
 
 	for (i = 0; i < count; i++) {
-		const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
-		struct i915_vma *vma = exec_to_vma(entry);
+		unsigned int flags = eb->flags[i];
+		struct i915_vma *vma = eb->vma[i];
+
+		i915_vma_move_to_active(vma, eb->request, flags);
+		eb_export_fence(vma, eb->request, flags);
 
-		eb_export_fence(vma, eb->request, entry->flags);
-		if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF))
+		__eb_unreserve_vma(vma, flags);
+		vma->exec_flags = NULL;
+
+		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
 			i915_vma_put(vma);
 	}
 	eb->exec = NULL;
@@ -2007,11 +2003,11 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
 	if (IS_ERR(vma))
 		goto out;
 
-	vma->exec_entry =
-		memset(&eb->exec[eb->buffer_count++],
-		       0, sizeof(*vma->exec_entry));
-	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
-	__exec_to_vma(vma->exec_entry) = (uintptr_t)i915_vma_get(vma);
+	eb->vma[eb->buffer_count] = i915_vma_get(vma);
+	eb->flags[eb->buffer_count] =
+		__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
+	vma->exec_flags = &eb->flags[eb->buffer_count];
+	eb->buffer_count++;
 
 out:
 	i915_gem_object_unpin_pages(shadow_batch_obj);
@@ -2270,7 +2266,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	eb.args = args;
 	if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
 		args->flags |= __EXEC_HAS_RELOC;
+
 	eb.exec = exec;
+	eb.vma = memset(exec + args->buffer_count + 1, 0,
+			(args->buffer_count + 1) * sizeof(*eb.vma));
+	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
+
 	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
 	if (USES_FULL_PPGTT(eb.i915))
 		eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
@@ -2358,7 +2359,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		goto err_vma;
 	}
 
-	if (unlikely(eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE)) {
+	if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
 		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
 		err = -EINVAL;
 		goto err_vma;
@@ -2511,7 +2512,9 @@ int
 i915_gem_execbuffer(struct drm_device *dev, void *data,
 		    struct drm_file *file)
 {
-	const size_t sz = sizeof(struct drm_i915_gem_exec_object2);
+	const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) +
+			   sizeof(struct i915_vma *) +
+			   sizeof(unsigned int));
 	struct drm_i915_gem_execbuffer *args = data;
 	struct drm_i915_gem_execbuffer2 exec2;
 	struct drm_i915_gem_exec_object *exec_list = NULL;
@@ -2602,7 +2605,9 @@ int
 i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		     struct drm_file *file)
 {
-	const size_t sz = sizeof(struct drm_i915_gem_exec_object2);
+	const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) +
+			   sizeof(struct i915_vma *) +
+			   sizeof(unsigned int));
 	struct drm_i915_gem_execbuffer2 *args = data;
 	struct drm_i915_gem_exec_object2 *exec2_list;
 	struct drm_syncobj **fences = NULL;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 20cf272c97b1..5c49506d14bc 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -112,7 +112,7 @@ struct i915_vma {
 	/**
 	 * Used for performing relocations during execbuffer insertion.
 	 */
-	struct drm_i915_gem_exec_object2 *exec_entry;
+	unsigned int *exec_flags;
 	struct hlist_node exec_node;
 	u32 exec_handle;
 
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 4/7] drm/i915: Simplify eb_lookup_vmas()
  2017-08-16  8:52 [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Chris Wilson
  2017-08-16  8:52 ` [PATCH 2/7] drm/i915: Check context status before looking up our obj/vma Chris Wilson
  2017-08-16  8:52 ` [PATCH 3/7] drm/i915: Convert execbuf to use struct-of-array packing for critical fields Chris Wilson
@ 2017-08-16  8:52 ` Chris Wilson
  2017-08-16  8:52 ` [PATCH 5/7] drm/i915: Replace execbuf vma ht with an idr Chris Wilson
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2017-08-16  8:52 UTC (permalink / raw)
  To: intel-gfx

Since the introduction of being able to perform a lockless lookup of an
object (i915_gem_object_get_rcu() in fbbd37b36fa5 ("drm/i915: Move object
release to a freelist + worker") we no longer need to split the
object/vma lookup into 3 phases and so combine them into a much simpler
single loop.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 122 +++++++++--------------------
 1 file changed, 36 insertions(+), 86 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index da6cb2fe5f85..95e461259d24 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -450,7 +450,9 @@ eb_validate_vma(struct i915_execbuffer *eb,
 }
 
 static int
-eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
+eb_add_vma(struct i915_execbuffer *eb,
+	   unsigned int i, struct i915_vma *vma,
+	   unsigned int flags)
 {
 	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
 	int err;
@@ -480,7 +482,7 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
 	 * to find the right target VMA when doing relocations.
 	 */
 	eb->vma[i] = vma;
-	eb->flags[i] = entry->flags;
+	eb->flags[i] = entry->flags | flags;
 	vma->exec_flags = &eb->flags[i];
 
 	err = 0;
@@ -686,13 +688,9 @@ static int eb_select_context(struct i915_execbuffer *eb)
 
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
-#define INTERMEDIATE BIT(0)
-	const unsigned int count = eb->buffer_count;
 	struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut;
-	struct i915_vma *vma;
-	struct idr *idr;
+	struct drm_i915_gem_object *uninitialized_var(obj);
 	unsigned int i;
-	int slow_pass = -1;
 	int err;
 
 	if (unlikely(i915_gem_context_is_closed(eb->ctx)))
@@ -708,82 +706,38 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		flush_work(&lut->resize);
 	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
 
-	for (i = 0; i < count; i++) {
-		hlist_for_each_entry(vma,
-				     ht_head(lut, eb->exec[i].handle),
-				     ctx_node) {
-			if (vma->ctx_handle != eb->exec[i].handle)
-				continue;
-
-			err = eb_add_vma(eb, i, vma);
-			if (unlikely(err))
-				return err;
-			goto next_vma;
-		}
-
-		if (slow_pass < 0)
-			slow_pass = i;
-next_vma: ;
-	}
+	for (i = 0; i < eb->buffer_count; i++) {
+		u32 handle = eb->exec[i].handle;
+		struct hlist_head *hl = ht_head(lut, handle);
+		unsigned int flags = 0;
+		struct i915_vma *vma;
 
-	if (slow_pass < 0)
-		goto out;
+		hlist_for_each_entry(vma, hl, ctx_node) {
+			GEM_BUG_ON(vma->ctx != eb->ctx);
 
-	spin_lock(&eb->file->table_lock);
-	/*
-	 * Grab a reference to the object and release the lock so we can lookup
-	 * or create the VMA without using GFP_ATOMIC
-	 */
-	idr = &eb->file->object_idr;
-	for (i = slow_pass; i < count; i++) {
-		struct drm_i915_gem_object *obj;
+			if (vma->ctx_handle != handle)
+				continue;
 
-		if (eb->vma[i])
-			continue;
+			goto add_vma;
+		}
 
-		obj = to_intel_bo(idr_find(idr, eb->exec[i].handle));
+		obj = i915_gem_object_lookup(eb->file, handle);
 		if (unlikely(!obj)) {
-			spin_unlock(&eb->file->table_lock);
-			DRM_DEBUG("Invalid object handle %d at index %d\n",
-				  eb->exec[i].handle, i);
 			err = -ENOENT;
-			goto err;
+			goto err_vma;
 		}
 
-		eb->vma[i] = (struct i915_vma *)
-			ptr_pack_bits(obj, INTERMEDIATE, 1);
-	}
-	spin_unlock(&eb->file->table_lock);
-
-	for (i = slow_pass; i < count; i++) {
-		struct drm_i915_gem_object *obj;
-		unsigned int is_obj;
-
-		obj = (typeof(obj))ptr_unpack_bits(eb->vma[i], &is_obj, 1);
-		if (!is_obj)
-			continue;
-
-		/*
-		 * NOTE: We can leak any vmas created here when something fails
-		 * later on. But that's no issue since vma_unbind can deal with
-		 * vmas which are not actually bound. And since only
-		 * lookup_or_create exists as an interface to get at the vma
-		 * from the (obj, vm) we don't run the risk of creating
-		 * duplicated vmas for the same vm.
-		 */
 		vma = i915_vma_instance(obj, eb->vm, NULL);
 		if (unlikely(IS_ERR(vma))) {
-			DRM_DEBUG("Failed to lookup VMA\n");
 			err = PTR_ERR(vma);
-			goto err;
+			goto err_obj;
 		}
 
 		/* First come, first served */
 		if (!vma->ctx) {
 			vma->ctx = eb->ctx;
-			vma->ctx_handle = eb->exec[i].handle;
-			hlist_add_head(&vma->ctx_node,
-				       ht_head(lut, eb->exec[i].handle));
+			vma->ctx_handle = handle;
+			hlist_add_head(&vma->ctx_node, hl);
 			lut->ht_count++;
 			lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
 			if (i915_vma_is_ggtt(vma)) {
@@ -791,21 +745,19 @@ next_vma: ;
 				obj->vma_hashed = vma;
 			}
 
-			i915_vma_get(vma);
+			/* transfer ref to ctx */
+			obj = NULL;
+		} else {
+			flags = __EXEC_OBJECT_HAS_REF;
 		}
 
-		err = eb_add_vma(eb, i, vma);
+add_vma:
+		err = eb_add_vma(eb, i, vma, flags);
 		if (unlikely(err))
-			goto err;
+			goto err_obj;
 
 		GEM_BUG_ON(vma != eb->vma[i]);
 		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
-
-		/* Only after we validated the user didn't use our bits */
-		if (vma->ctx != eb->ctx) {
-			i915_vma_get(vma);
-			*vma->exec_flags |= __EXEC_OBJECT_HAS_REF;
-		}
 	}
 
 	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
@@ -815,7 +767,6 @@ next_vma: ;
 			lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
 	}
 
-out:
 	/* take note of the batch buffer before we might reorder the lists */
 	i = eb_batch_index(eb);
 	eb->batch = eb->vma[i];
@@ -838,14 +789,13 @@ next_vma: ;
 	eb->args->flags |= __EXEC_VALIDATED;
 	return eb_reserve(eb);
 
-err:
-	for (i = slow_pass; i < count; i++) {
-		if (ptr_unmask_bits(eb->vma[i], 1))
-			eb->vma[i] = NULL;
-	}
+err_obj:
+	if (obj)
+		i915_gem_object_put(obj);
+err_vma:
+	eb->vma[i] = NULL;
 	lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
 	return err;
-#undef INTERMEDIATE
 }
 
 static struct i915_vma *
@@ -878,7 +828,7 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
 		unsigned int flags = eb->flags[i];
 
 		if (!vma)
-			continue;
+			break;
 
 		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
 		vma->exec_flags = NULL;
@@ -2268,8 +2218,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		args->flags |= __EXEC_HAS_RELOC;
 
 	eb.exec = exec;
-	eb.vma = memset(exec + args->buffer_count + 1, 0,
-			(args->buffer_count + 1) * sizeof(*eb.vma));
+	eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
+	eb.vma[0] = NULL;
 	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
 
 	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 5/7] drm/i915: Replace execbuf vma ht with an idr
  2017-08-16  8:52 [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Chris Wilson
                   ` (2 preceding siblings ...)
  2017-08-16  8:52 ` [PATCH 4/7] drm/i915: Simplify eb_lookup_vmas() Chris Wilson
@ 2017-08-16  8:52 ` Chris Wilson
  2017-08-16  8:52 ` [PATCH 6/7] drm/i915: Trivial grammar fix s/opt of/opt out of/ in comment Chris Wilson
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2017-08-16  8:52 UTC (permalink / raw)
  To: intel-gfx

This was the competing idea long ago, but it was only with the rewrite
of the idr as an radixtree and using the radixtree directly ourselves,
along with the realisation that we can store the vma directly in the
radixtree and only need a list for the reverse mapping, that made the
patch performant enough to displace using a hashtable. Though the vma ht
is fast and doesn't require and extra allocation (as we can embed the node
inside the vma), it does require a thread for resizing and serialization
and will have the occasional slow lookup. That is hairy enough to
investigate alternatives and favour them if equivalent in peak performance.
One advantage of allocating an indirection entry is that we can support a
single shared bo between many clients, something that was done on a
first-come first-serve basis for shared GGTT vma previously. To offset
the extra allocations, we create yet another kmem_cache for them.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c           |  6 --
 drivers/gpu/drm/i915/i915_drv.h               |  1 +
 drivers/gpu/drm/i915/i915_gem.c               | 46 +++++++++-----
 drivers/gpu/drm/i915/i915_gem_context.c       | 87 ++++++---------------------
 drivers/gpu/drm/i915/i915_gem_context.h       | 39 ++++--------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 79 ++++++++----------------
 drivers/gpu/drm/i915/i915_gem_object.h        | 23 ++++++-
 drivers/gpu/drm/i915/i915_vma.c               | 22 -------
 drivers/gpu/drm/i915/i915_vma.h               |  4 --
 drivers/gpu/drm/i915/selftests/mock_context.c | 15 ++---
 lib/radix-tree.c                              |  1 +
 11 files changed, 115 insertions(+), 208 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 329fb3649dc3..48572b157222 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1954,12 +1954,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
 			seq_putc(m, '\n');
 		}
 
-		seq_printf(m,
-			   "\tvma hashtable size=%u (actual %lu), count=%u\n",
-			   ctx->vma_lut.ht_size,
-			   BIT(ctx->vma_lut.ht_bits),
-			   ctx->vma_lut.ht_count);
-
 		seq_putc(m, '\n');
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 620e53bd705a..2657e81978a0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2155,6 +2155,7 @@ struct drm_i915_private {
 
 	struct kmem_cache *objects;
 	struct kmem_cache *vmas;
+	struct kmem_cache *luts;
 	struct kmem_cache *requests;
 	struct kmem_cache *dependencies;
 	struct kmem_cache *priorities;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5a3f3bb3f21d..b9e8e0d6e97b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3242,25 +3242,33 @@ i915_gem_idle_work_handler(struct work_struct *work)
 
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 {
+	struct drm_i915_private *i915 = to_i915(gem->dev);
 	struct drm_i915_gem_object *obj = to_intel_bo(gem);
 	struct drm_i915_file_private *fpriv = file->driver_priv;
-	struct i915_vma *vma, *vn;
+	struct i915_lut_handle *lut, *ln;
 
-	mutex_lock(&obj->base.dev->struct_mutex);
-	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
-		if (vma->vm->file == fpriv)
+	mutex_lock(&i915->drm.struct_mutex);
+
+	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+		struct i915_vma *vma;
+
+		if (ctx->file_priv != fpriv)
+			continue;
+
+		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
+
+		if (!i915_vma_is_ggtt(vma))
 			i915_vma_close(vma);
 
-	vma = obj->vma_hashed;
-	if (vma && vma->ctx->file_priv == fpriv)
-		i915_vma_unlink_ctx(vma);
+		list_del(&lut->obj_link);
+		list_del(&lut->ctx_link);
 
-	if (i915_gem_object_is_active(obj) &&
-	    !i915_gem_object_has_active_reference(obj)) {
-		i915_gem_object_set_active_reference(obj);
-		i915_gem_object_get(obj);
+		kmem_cache_free(i915->luts, lut);
+		__i915_gem_object_release_unless_active(obj);
 	}
-	mutex_unlock(&obj->base.dev->struct_mutex);
+
+	mutex_unlock(&i915->drm.struct_mutex);
 }
 
 static unsigned long to_wait_timeout(s64 timeout_ns)
@@ -4252,6 +4260,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&obj->global_link);
 	INIT_LIST_HEAD(&obj->userfault_link);
 	INIT_LIST_HEAD(&obj->vma_list);
+	INIT_LIST_HEAD(&obj->lut_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
 
 	obj->ops = ops;
@@ -4495,8 +4504,8 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
 {
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
-	if (i915_gem_object_is_active(obj))
+	if (!i915_gem_object_has_active_reference(obj) &&
+	    i915_gem_object_is_active(obj))
 		i915_gem_object_set_active_reference(obj);
 	else
 		i915_gem_object_put(obj);
@@ -4888,12 +4897,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	if (!dev_priv->vmas)
 		goto err_objects;
 
+	dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0);
+	if (!dev_priv->luts)
+		goto err_vmas;
+
 	dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
 					SLAB_HWCACHE_ALIGN |
 					SLAB_RECLAIM_ACCOUNT |
 					SLAB_TYPESAFE_BY_RCU);
 	if (!dev_priv->requests)
-		goto err_vmas;
+		goto err_luts;
 
 	dev_priv->dependencies = KMEM_CACHE(i915_dependency,
 					    SLAB_HWCACHE_ALIGN |
@@ -4937,6 +4950,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	kmem_cache_destroy(dev_priv->dependencies);
 err_requests:
 	kmem_cache_destroy(dev_priv->requests);
+err_luts:
+	kmem_cache_destroy(dev_priv->luts);
 err_vmas:
 	kmem_cache_destroy(dev_priv->vmas);
 err_objects:
@@ -4959,6 +4974,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
 	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
 	kmem_cache_destroy(dev_priv->requests);
+	kmem_cache_destroy(dev_priv->luts);
 	kmem_cache_destroy(dev_priv->vmas);
 	kmem_cache_destroy(dev_priv->objects);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 86ac74a8a5b2..58a2a44f88bd 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -93,69 +93,28 @@
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
-/* Initial size (as log2) to preallocate the handle->object hashtable */
-#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */
-
-static void resize_vma_ht(struct work_struct *work)
+static void lut_close(struct i915_gem_context *ctx)
 {
-	struct i915_gem_context_vma_lut *lut =
-		container_of(work, typeof(*lut), resize);
-	unsigned int bits, new_bits, size, i;
-	struct hlist_head *new_ht;
-
-	GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));
-
-	bits = 1 + ilog2(4*lut->ht_count/3 + 1);
-	new_bits = min_t(unsigned int,
-			 max(bits, VMA_HT_BITS),
-			 sizeof(unsigned int) * BITS_PER_BYTE - 1);
-	if (new_bits == lut->ht_bits)
-		goto out;
-
-	new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
-	if (!new_ht)
-		new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
-	if (!new_ht)
-		/* Pretend resize succeeded and stop calling us for a bit! */
-		goto out;
-
-	size = BIT(lut->ht_bits);
-	for (i = 0; i < size; i++) {
-		struct i915_vma *vma;
-		struct hlist_node *tmp;
+	struct i915_lut_handle *lut, *ln;
+	struct radix_tree_iter iter;
+	void __rcu **slot;
 
-		hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
-			hlist_add_head(&vma->ctx_node,
-				       &new_ht[hash_32(vma->ctx_handle,
-						       new_bits)]);
+	list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
+		list_del(&lut->obj_link);
+		kmem_cache_free(ctx->i915->luts, lut);
 	}
-	kvfree(lut->ht);
-	lut->ht = new_ht;
-	lut->ht_bits = new_bits;
-out:
-	smp_store_release(&lut->ht_size, BIT(bits));
-	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
-}
 
-static void vma_lut_free(struct i915_gem_context *ctx)
-{
-	struct i915_gem_context_vma_lut *lut = &ctx->vma_lut;
-	unsigned int i, size;
+	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
+		struct i915_vma *vma = rcu_dereference_raw(*slot);
+		struct drm_i915_gem_object *obj = vma->obj;
 
-	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)
-		cancel_work_sync(&lut->resize);
+		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
 
-	size = BIT(lut->ht_bits);
-	for (i = 0; i < size; i++) {
-		struct i915_vma *vma;
+		if (!i915_vma_is_ggtt(vma))
+			i915_vma_close(vma);
 
-		hlist_for_each_entry(vma, &lut->ht[i], ctx_node) {
-			vma->obj->vma_hashed = NULL;
-			vma->ctx = NULL;
-			i915_vma_put(vma);
-		}
+		__i915_gem_object_release_unless_active(obj);
 	}
-	kvfree(lut->ht);
 }
 
 static void i915_gem_context_free(struct i915_gem_context *ctx)
@@ -165,7 +124,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
-	vma_lut_free(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
 
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -239,8 +197,11 @@ void i915_gem_context_release(struct kref *ref)
 static void context_close(struct i915_gem_context *ctx)
 {
 	i915_gem_context_set_closed(ctx);
+
+	lut_close(ctx);
 	if (ctx->ppgtt)
 		i915_ppgtt_close(&ctx->ppgtt->base);
+
 	ctx->file_priv = ERR_PTR(-EBADF);
 	i915_gem_context_put(ctx);
 }
@@ -313,16 +274,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	ctx->i915 = dev_priv;
 	ctx->priority = I915_PRIORITY_NORMAL;
 
-	ctx->vma_lut.ht_bits = VMA_HT_BITS;
-	ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
-	BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS);
-	ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
-				  sizeof(*ctx->vma_lut.ht),
-				  GFP_KERNEL);
-	if (!ctx->vma_lut.ht)
-		goto err_out;
-
-	INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht);
+	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+	INIT_LIST_HEAD(&ctx->handles_list);
 
 	/* Default context will never have a file_priv */
 	ret = DEFAULT_CONTEXT_HANDLE;
@@ -372,8 +325,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	put_pid(ctx->pid);
 	idr_remove(&file_priv->context_idr, ctx->user_handle);
 err_lut:
-	kvfree(ctx->vma_lut.ht);
-err_out:
 	context_close(ctx);
 	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 2d02918a449e..44688e22a5c2 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -27,6 +27,7 @@
 
 #include <linux/bitops.h>
 #include <linux/list.h>
+#include <linux/radix-tree.h>
 
 struct pid;
 
@@ -149,32 +150,6 @@ struct i915_gem_context {
 	/** ggtt_offset_bias: placement restriction for context objects */
 	u32 ggtt_offset_bias;
 
-	struct i915_gem_context_vma_lut {
-		/** ht_size: last request size to allocate the hashtable for. */
-		unsigned int ht_size;
-#define I915_CTX_RESIZE_IN_PROGRESS BIT(0)
-		/** ht_bits: real log2(size) of hashtable. */
-		unsigned int ht_bits;
-		/** ht_count: current number of entries inside the hashtable */
-		unsigned int ht_count;
-
-		/** ht: the array of buckets comprising the simple hashtable */
-		struct hlist_head *ht;
-
-		/**
-		 * resize: After an execbuf completes, we check the load factor
-		 * of the hashtable. If the hashtable is too full, or too empty,
-		 * we schedule a task to resize the hashtable. During the
-		 * resize, the entries are moved between different buckets and
-		 * so we cannot simultaneously read the hashtable as it is
-		 * being resized (unlike rhashtable). Therefore we treat the
-		 * active work as a strong barrier, pausing a subsequent
-		 * execbuf to wait for the resize worker to complete, if
-		 * required.
-		 */
-		struct work_struct resize;
-	} vma_lut;
-
 	/** engine: per-engine logical HW state */
 	struct intel_context {
 		struct i915_vma *state;
@@ -205,6 +180,18 @@ struct i915_gem_context {
 
 	/** remap_slice: Bitmask of cache lines that need remapping */
 	u8 remap_slice;
+
+	/** handles_vma: rbtree to look up our context specific obj/vma for
+	 * the user handle. (user handles are per fd, but the binding is
+	 * per vm, which may be one per context or shared with the global GTT)
+	 */
+	struct radix_tree_root handles_vma;
+
+	/** handles_list: reverse list of all the rbtree entries in use for
+	 * this context, which allows us to free all the allocations on
+	 * context close.
+	 */
+	struct list_head handles_list;
 };
 
 static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 95e461259d24..9f1057c4cf1c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -450,9 +450,7 @@ eb_validate_vma(struct i915_execbuffer *eb,
 }
 
 static int
-eb_add_vma(struct i915_execbuffer *eb,
-	   unsigned int i, struct i915_vma *vma,
-	   unsigned int flags)
+eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
 {
 	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
 	int err;
@@ -482,7 +480,7 @@ eb_add_vma(struct i915_execbuffer *eb,
 	 * to find the right target VMA when doing relocations.
 	 */
 	eb->vma[i] = vma;
-	eb->flags[i] = entry->flags | flags;
+	eb->flags[i] = entry->flags;
 	vma->exec_flags = &eb->flags[i];
 
 	err = 0;
@@ -647,19 +645,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
 	} while (1);
 }
 
-static inline struct hlist_head *
-ht_head(const  struct i915_gem_context_vma_lut *lut, u32 handle)
-{
-	return &lut->ht[hash_32(handle, lut->ht_bits)];
-}
-
-static inline bool
-ht_needs_resize(const struct i915_gem_context_vma_lut *lut)
-{
-	return (4*lut->ht_count > 3*lut->ht_size ||
-		4*lut->ht_count + 1 < lut->ht_size);
-}
-
 static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
 {
 	if (eb->args->flags & I915_EXEC_BATCH_FIRST)
@@ -688,7 +673,7 @@ static int eb_select_context(struct i915_execbuffer *eb)
 
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
-	struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut;
+	struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
 	struct drm_i915_gem_object *uninitialized_var(obj);
 	unsigned int i;
 	int err;
@@ -702,24 +687,14 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	INIT_LIST_HEAD(&eb->relocs);
 	INIT_LIST_HEAD(&eb->unbound);
 
-	if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS))
-		flush_work(&lut->resize);
-	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
-
 	for (i = 0; i < eb->buffer_count; i++) {
 		u32 handle = eb->exec[i].handle;
-		struct hlist_head *hl = ht_head(lut, handle);
-		unsigned int flags = 0;
+		struct i915_lut_handle *lut;
 		struct i915_vma *vma;
 
-		hlist_for_each_entry(vma, hl, ctx_node) {
-			GEM_BUG_ON(vma->ctx != eb->ctx);
-
-			if (vma->ctx_handle != handle)
-				continue;
-
+		vma = radix_tree_lookup(handles_vma, handle);
+		if (likely(vma))
 			goto add_vma;
-		}
 
 		obj = i915_gem_object_lookup(eb->file, handle);
 		if (unlikely(!obj)) {
@@ -733,26 +708,28 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 			goto err_obj;
 		}
 
-		/* First come, first served */
-		if (!vma->ctx) {
-			vma->ctx = eb->ctx;
-			vma->ctx_handle = handle;
-			hlist_add_head(&vma->ctx_node, hl);
-			lut->ht_count++;
-			lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
-			if (i915_vma_is_ggtt(vma)) {
-				GEM_BUG_ON(obj->vma_hashed);
-				obj->vma_hashed = vma;
-			}
+		lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL);
+		if (unlikely(!lut)) {
+			err = -ENOMEM;
+			goto err_obj;
+		}
 
-			/* transfer ref to ctx */
-			obj = NULL;
-		} else {
-			flags = __EXEC_OBJECT_HAS_REF;
+		err = radix_tree_insert(handles_vma, handle, vma);
+		if (unlikely(err)) {
+			kfree(lut);
+			goto err_obj;
 		}
 
+		list_add(&lut->obj_link, &obj->lut_list);
+		list_add(&lut->ctx_link, &eb->ctx->handles_list);
+		lut->ctx = eb->ctx;
+		lut->handle = handle;
+
+		/* transfer ref to ctx */
+		obj = NULL;
+
 add_vma:
-		err = eb_add_vma(eb, i, vma, flags);
+		err = eb_add_vma(eb, i, vma);
 		if (unlikely(err))
 			goto err_obj;
 
@@ -760,13 +737,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
 	}
 
-	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
-		if (ht_needs_resize(lut))
-			queue_work(system_highpri_wq, &lut->resize);
-		else
-			lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
-	}
-
 	/* take note of the batch buffer before we might reorder the lists */
 	i = eb_batch_index(eb);
 	eb->batch = eb->vma[i];
@@ -794,7 +764,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		i915_gem_object_put(obj);
 err_vma:
 	eb->vma[i] = NULL;
-	lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 3baa341432db..c30d8f808185 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -38,6 +38,19 @@
 
 struct drm_i915_gem_object;
 
+/*
+ * struct i915_lut_handle tracks the fast lookups from handle to vma used
+ * for execbuf. Although we use a radixtree for that mapping, in order to
+ * remove them as the object or context is closed, we need a secondary list
+ * and a translation entry (i915_lut_handle).
+ */
+struct i915_lut_handle {
+	struct list_head obj_link;
+	struct list_head ctx_link;
+	struct i915_gem_context *ctx;
+	u32 handle;
+};
+
 struct drm_i915_gem_object_ops {
 	unsigned int flags;
 #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
@@ -89,7 +102,15 @@ struct drm_i915_gem_object {
 	 * They are also added to @vma_list for easy iteration.
 	 */
 	struct rb_root vma_tree;
-	struct i915_vma *vma_hashed;
+
+	/**
+	 * @lut_list: List of vma lookup entries in use for this object.
+	 *
+	 * If this object is closed, we need to remove all of its VMA from
+	 * the fast lookup index in associated contexts; @lut_list provides
+	 * this translation from object to context->handles_vma.
+	 */
+	struct list_head lut_list;
 
 	/** Stolen memory for this object, instead of being backed by shmem. */
 	struct drm_mm_node *stolen;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 958be0a95960..02d1a5eacb00 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -597,33 +597,11 @@ static void i915_vma_destroy(struct i915_vma *vma)
 	kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
 }
 
-void i915_vma_unlink_ctx(struct i915_vma *vma)
-{
-	struct i915_gem_context *ctx = vma->ctx;
-
-	if (ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
-		cancel_work_sync(&ctx->vma_lut.resize);
-		ctx->vma_lut.ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
-	}
-
-	__hlist_del(&vma->ctx_node);
-	ctx->vma_lut.ht_count--;
-
-	if (i915_vma_is_ggtt(vma))
-		vma->obj->vma_hashed = NULL;
-	vma->ctx = NULL;
-
-	i915_vma_put(vma);
-}
-
 void i915_vma_close(struct i915_vma *vma)
 {
 	GEM_BUG_ON(i915_vma_is_closed(vma));
 	vma->flags |= I915_VMA_CLOSED;
 
-	if (vma->ctx)
-		i915_vma_unlink_ctx(vma);
-
 	list_del(&vma->obj_link);
 	rb_erase(&vma->obj_node, &vma->obj->vma_tree);
 
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 5c49506d14bc..1fd61e88cfd0 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -115,10 +115,6 @@ struct i915_vma {
 	unsigned int *exec_flags;
 	struct hlist_node exec_node;
 	u32 exec_handle;
-
-	struct i915_gem_context *ctx;
-	struct hlist_node ctx_node;
-	u32 ctx_handle;
 };
 
 struct i915_vma *
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index d436f2d5089b..098ce643ad07 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -40,18 +40,13 @@ mock_context(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&ctx->link);
 	ctx->i915 = i915;
 
-	ctx->vma_lut.ht_bits = VMA_HT_BITS;
-	ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
-	ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
-				  sizeof(*ctx->vma_lut.ht),
-				  GFP_KERNEL);
-	if (!ctx->vma_lut.ht)
-		goto err_free;
+	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+	INIT_LIST_HEAD(&ctx->handles_list);
 
 	ret = ida_simple_get(&i915->contexts.hw_ida,
 			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
 	if (ret < 0)
-		goto err_vma_ht;
+		goto err_handles;
 	ctx->hw_id = ret;
 
 	if (name) {
@@ -66,9 +61,7 @@ mock_context(struct drm_i915_private *i915,
 
 	return ctx;
 
-err_vma_ht:
-	kvfree(ctx->vma_lut.ht);
-err_free:
+err_handles:
 	kfree(ctx);
 	return NULL;
 
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 898e87998417..3527eb364964 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2022,6 +2022,7 @@ void radix_tree_iter_delete(struct radix_tree_root *root,
 	if (__radix_tree_delete(root, iter->node, slot))
 		iter->index = iter->next_index;
 }
+EXPORT_SYMBOL(radix_tree_iter_delete);
 
 /**
  * radix_tree_delete_item - delete an item from a radix tree
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 6/7] drm/i915: Trivial grammar fix s/opt of/opt out of/ in comment
  2017-08-16  8:52 [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Chris Wilson
                   ` (3 preceding siblings ...)
  2017-08-16  8:52 ` [PATCH 5/7] drm/i915: Replace execbuf vma ht with an idr Chris Wilson
@ 2017-08-16  8:52 ` Chris Wilson
  2017-08-17 13:38   ` Joonas Lahtinen
  2017-08-16  8:52 ` [PATCH 7/7] drm/i915: Mark the GT as busy before idling the previous request Chris Wilson
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2017-08-16  8:52 UTC (permalink / raw)
  To: intel-gfx

The word out was dropped from the sentence across the line break, put it
back.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9f1057c4cf1c..3d74f3a27c13 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1346,7 +1346,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 	 * patching using the GPU (though that should be serialised by the
 	 * timeline). To be completely sure, and since we are required to
 	 * do relocations we are already stalling, disable the user's opt
-	 * of our synchronisation.
+	 * out of our synchronisation.
 	 */
 	*vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
 
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 7/7] drm/i915: Mark the GT as busy before idling the previous request
  2017-08-16  8:52 [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Chris Wilson
                   ` (4 preceding siblings ...)
  2017-08-16  8:52 ` [PATCH 6/7] drm/i915: Trivial grammar fix s/opt of/opt out of/ in comment Chris Wilson
@ 2017-08-16  8:52 ` Chris Wilson
  2017-08-17 13:38   ` Joonas Lahtinen
  2017-08-16  9:10 ` ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Patchwork
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2017-08-16  8:52 UTC (permalink / raw)
  To: intel-gfx

In a synchronous setup, we may retire the last request before we
complete allocating the next request. As the last request is retired, we
queue a timer to mark the device as idle, and promptly have to execute
ad cancel that timer once we complete allocating the request and need to
keep the device awake. If we rearrange the mark_busy() to occur before
we retire the previous request, we can skip this ping-pong.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 61 ++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 9eedd33eb524..87b34153945c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -265,6 +265,13 @@ static int reserve_seqno(struct intel_engine_cs *engine)
 
 static void unreserve_seqno(struct intel_engine_cs *engine)
 {
+	if (!--engine->i915->gt.active_requests) {
+		GEM_BUG_ON(!engine->i915->gt.awake);
+		mod_delayed_work(engine->i915->wq,
+				 &engine->i915->gt.idle_work,
+				 msecs_to_jiffies(100));
+	}
+
 	GEM_BUG_ON(!engine->timeline->inflight_seqnos);
 	engine->timeline->inflight_seqnos--;
 }
@@ -333,12 +340,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	list_del_init(&request->link);
 	spin_unlock_irq(&engine->timeline->lock);
 
-	if (!--request->i915->gt.active_requests) {
-		GEM_BUG_ON(!request->i915->gt.awake);
-		mod_delayed_work(request->i915->wq,
-				 &request->i915->gt.idle_work,
-				 msecs_to_jiffies(100));
-	}
 	unreserve_seqno(request->engine);
 	advance_ring(request);
 
@@ -540,6 +541,26 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
+static void mark_busy(struct drm_i915_private *i915)
+{
+	if (i915->gt.awake)
+		return;
+
+	GEM_BUG_ON(!i915->gt.active_requests);
+
+	intel_runtime_pm_get_noresume(i915);
+	i915->gt.awake = true;
+
+	intel_enable_gt_powersave(i915);
+	i915_update_gfx_val(i915);
+	if (INTEL_GEN(i915) >= 6)
+		gen6_rps_busy(i915);
+
+	queue_delayed_work(i915->wq,
+			   &i915->gt.retire_work,
+			   round_jiffies_up_relative(HZ));
+}
+
 /**
  * i915_gem_request_alloc - allocate a request structure
  *
@@ -579,6 +600,9 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (ret)
 		goto err_unpin;
 
+	if (!dev_priv->gt.active_requests++)
+		mark_busy(dev_priv);
+
 	/* Move the oldest request to the slab-cache (if not in use!) */
 	req = list_first_entry_or_null(&engine->timeline->requests,
 				       typeof(*req), link);
@@ -863,28 +887,6 @@ i915_gem_request_await_object(struct drm_i915_gem_request *to,
 	return ret;
 }
 
-static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	if (dev_priv->gt.awake)
-		return;
-
-	GEM_BUG_ON(!dev_priv->gt.active_requests);
-
-	intel_runtime_pm_get_noresume(dev_priv);
-	dev_priv->gt.awake = true;
-
-	intel_enable_gt_powersave(dev_priv);
-	i915_update_gfx_val(dev_priv);
-	if (INTEL_GEN(dev_priv) >= 6)
-		gen6_rps_busy(dev_priv);
-
-	queue_delayed_work(dev_priv->wq,
-			   &dev_priv->gt.retire_work,
-			   round_jiffies_up_relative(HZ));
-}
-
 /*
  * NB: This function is not allowed to fail. Doing so would mean the the
  * request is not being tracked for completion but the work itself is
@@ -966,9 +968,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	list_add_tail(&request->ring_link, &ring->request_list);
 	request->emitted_jiffies = jiffies;
 
-	if (!request->i915->gt.active_requests++)
-		i915_gem_mark_busy(engine);
-
 	/* Let the backend know a new request has arrived that may need
 	 * to adjust the existing execution schedule due to a high priority
 	 * request - i.e. we may want to preempt the current request in order
-- 
2.13.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs
  2017-08-16  8:52 [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Chris Wilson
                   ` (5 preceding siblings ...)
  2017-08-16  8:52 ` [PATCH 7/7] drm/i915: Mark the GT as busy before idling the previous request Chris Wilson
@ 2017-08-16  9:10 ` Patchwork
  2017-08-17 15:36 ` ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs (rev2) Patchwork
  2017-08-18 11:07 ` [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Mika Kuoppala
  8 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2017-08-16  9:10 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs
URL   : https://patchwork.freedesktop.org/series/28859/
State : success

== Summary ==

Series 28859v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/28859/revisions/1/mbox/

Test gem_exec_flush:
        Subgroup basic-batch-kernel-default-uc:
                pass       -> FAIL       (fi-snb-2600) fdo#100007
Test kms_flip:
        Subgroup basic-flip-vs-modeset:
                pass       -> SKIP       (fi-skl-x1585l) fdo#101781
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                pass       -> DMESG-WARN (fi-byt-n2820) fdo#101705

fdo#100007 https://bugs.freedesktop.org/show_bug.cgi?id=100007
fdo#101781 https://bugs.freedesktop.org/show_bug.cgi?id=101781
fdo#101705 https://bugs.freedesktop.org/show_bug.cgi?id=101705

fi-bdw-5557u     total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:450s
fi-bdw-gvtdvm    total:279  pass:265  dwarn:0   dfail:0   fail:0   skip:14  time:435s
fi-blb-e6850     total:279  pass:224  dwarn:1   dfail:0   fail:0   skip:54  time:361s
fi-bsw-n3050     total:279  pass:243  dwarn:0   dfail:0   fail:0   skip:36  time:549s
fi-bxt-j4205     total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:514s
fi-byt-j1900     total:279  pass:254  dwarn:1   dfail:0   fail:0   skip:24  time:524s
fi-byt-n2820     total:279  pass:250  dwarn:1   dfail:0   fail:0   skip:28  time:518s
fi-glk-2a        total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:612s
fi-hsw-4770      total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:442s
fi-hsw-4770r     total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:419s
fi-ilk-650       total:279  pass:229  dwarn:0   dfail:0   fail:0   skip:50  time:418s
fi-ivb-3520m     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:496s
fi-ivb-3770      total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:479s
fi-kbl-7500u     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:478s
fi-kbl-7560u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:595s
fi-kbl-r         total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:595s
fi-pnv-d510      total:279  pass:223  dwarn:1   dfail:0   fail:0   skip:55  time:524s
fi-skl-6260u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:459s
fi-skl-6700k     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:471s
fi-skl-6770hq    total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:483s
fi-skl-gvtdvm    total:279  pass:266  dwarn:0   dfail:0   fail:0   skip:13  time:438s
fi-skl-x1585l    total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:473s
fi-snb-2520m     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:550s
fi-snb-2600      total:279  pass:249  dwarn:0   dfail:0   fail:1   skip:29  time:411s

5118c7fa9c878351d1ed17a36b97966f68da72df drm-tip: 2017y-08m-15d-23h-58m-24s UTC integration manifest
9f7d5d966ef8 drm/i915: Mark the GT as busy before idling the previous request
7554109c76df drm/i915: Trivial grammar fix s/opt of/opt out of/ in comment
ae5db1722e6e drm/i915: Replace execbuf vma ht with an idr
1e7a576abe5f drm/i915: Simplify eb_lookup_vmas()
174eb18ee1a4 drm/i915: Convert execbuf to use struct-of-array packing for critical fields
1d866a140a9b drm/i915: Check context status before looking up our obj/vma
100336bac12a drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5413/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 7/7] drm/i915: Mark the GT as busy before idling the previous request
  2017-08-16  8:52 ` [PATCH 7/7] drm/i915: Mark the GT as busy before idling the previous request Chris Wilson
@ 2017-08-17 13:38   ` Joonas Lahtinen
  2017-08-17 14:47     ` [PATCH v2] " Chris Wilson
  0 siblings, 1 reply; 19+ messages in thread
From: Joonas Lahtinen @ 2017-08-17 13:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Wed, 2017-08-16 at 09:52 +0100, Chris Wilson wrote:
> In a synchronous setup, we may retire the last request before we
> complete allocating the next request. As the last request is retired, we
> queue a timer to mark the device as idle, and promptly have to execute
> ad cancel that timer once we complete allocating the request and need to
> keep the device awake. If we rearrange the mark_busy() to occur before
> we retire the previous request, we can skip this ping-pong.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

<SNIP>

> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -265,6 +265,13 @@ static int reserve_seqno(struct intel_engine_cs *engine)
>  
>  static void unreserve_seqno(struct intel_engine_cs *engine)
>  {
> +	if (!--engine->i915->gt.active_requests) {
> +		GEM_BUG_ON(!engine->i915->gt.awake);
> +		mod_delayed_work(engine->i915->wq,
> +				 &engine->i915->gt.idle_work,
> +				 msecs_to_jiffies(100));
> +	}

This function could use a better name, now it seems to tightly tie to
seqno only, and idle work is very unexpected. How about just
unreserve_engine vs. reserve_engine?

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 6/7] drm/i915: Trivial grammar fix s/opt of/opt out of/ in comment
  2017-08-16  8:52 ` [PATCH 6/7] drm/i915: Trivial grammar fix s/opt of/opt out of/ in comment Chris Wilson
@ 2017-08-17 13:38   ` Joonas Lahtinen
  0 siblings, 0 replies; 19+ messages in thread
From: Joonas Lahtinen @ 2017-08-17 13:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Wed, 2017-08-16 at 09:52 +0100, Chris Wilson wrote:
> The word out was dropped from the sentence across the line break, put it
> back.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/7] drm/i915: Check context status before looking up our obj/vma
  2017-08-16  8:52 ` [PATCH 2/7] drm/i915: Check context status before looking up our obj/vma Chris Wilson
@ 2017-08-17 14:10   ` Mika Kuoppala
  2017-08-17 15:43     ` Chris Wilson
  0 siblings, 1 reply; 19+ messages in thread
From: Mika Kuoppala @ 2017-08-17 14:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Since we keep the context around across the slow lookup where we may
> drop the struct_mutex, we should double check that the context is still
> valid upon reacquisition.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 ++++++-------
>  1 file changed, 6 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 359d5dc6d8df..044fb1205554 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -679,13 +679,6 @@ static int eb_select_context(struct i915_execbuffer *eb)
>  	if (unlikely(!ctx))
>  		return -ENOENT;
>  
> -	if (unlikely(i915_gem_context_is_banned(ctx))) {
> -		DRM_DEBUG("Context %u tried to submit while banned\n",
> -			  ctx->user_handle);
> -		i915_gem_context_put(ctx);
> -		return -EIO;
> -	}
> -
>  	eb->ctx = ctx;
>  	eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base;
>  
> @@ -707,6 +700,12 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>  	int slow_pass = -1;
>  	int err;
>  
> +	if (unlikely(i915_gem_context_is_closed(eb->ctx)))
> +		return -ENOENT;
> +
> +	if (unlikely(i915_gem_context_is_banned(eb->ctx)))
> +		return -EIO;
> +

We are referring the lut before the context has been validated.
Not that it matters but for style, please consider assigning
the lut after the context check.

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

>  	INIT_LIST_HEAD(&eb->relocs);
>  	INIT_LIST_HEAD(&eb->unbound);
>  
> -- 
> 2.13.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v2] drm/i915: Mark the GT as busy before idling the previous request
  2017-08-17 13:38   ` Joonas Lahtinen
@ 2017-08-17 14:47     ` Chris Wilson
  2017-08-18  9:11       ` Joonas Lahtinen
  0 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2017-08-17 14:47 UTC (permalink / raw)
  To: intel-gfx

In a synchronous setup, we may retire the last request before we
complete allocating the next request. As the last request is retired, we
queue a timer to mark the device as idle, and promptly have to execute
ad cancel that timer once we complete allocating the request and need to
keep the device awake. If we rearrange the mark_busy() to occur before
we retire the previous request, we can skip this ping-pong.

v2: Joonas pointed out that unreserve_seqno() was now doing more than
doing seqno handling and should be renamed to reflect its wider purpose.
That also highlighted the new asymmetry with reserve_seqno(), so fixup
that and rename both to [un]reserve_seqno().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 87 +++++++++++++++++----------------
 1 file changed, 44 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 9eedd33eb524..5716b6d78e8e 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -244,27 +244,59 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
 	return reset_all_global_seqno(dev_priv, seqno - 1);
 }
 
-static int reserve_seqno(struct intel_engine_cs *engine)
+static void mark_busy(struct drm_i915_private *i915)
 {
+	if (i915->gt.awake)
+		return;
+
+	GEM_BUG_ON(!i915->gt.active_requests);
+
+	intel_runtime_pm_get_noresume(i915);
+	i915->gt.awake = true;
+
+	intel_enable_gt_powersave(i915);
+	i915_update_gfx_val(i915);
+	if (INTEL_GEN(i915) >= 6)
+		gen6_rps_busy(i915);
+
+	queue_delayed_work(i915->wq,
+			   &i915->gt.retire_work,
+			   round_jiffies_up_relative(HZ));
+}
+
+static int reserve_engine(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
 	u32 active = ++engine->timeline->inflight_seqnos;
 	u32 seqno = engine->timeline->seqno;
 	int ret;
 
 	/* Reservation is fine until we need to wrap around */
-	if (likely(!add_overflows(seqno, active)))
-		return 0;
-
-	ret = reset_all_global_seqno(engine->i915, 0);
-	if (ret) {
-		engine->timeline->inflight_seqnos--;
-		return ret;
+	if (unlikely(add_overflows(seqno, active))) {
+		ret = reset_all_global_seqno(i915, 0);
+		if (ret) {
+			engine->timeline->inflight_seqnos--;
+			return ret;
+		}
 	}
 
+	if (!i915->gt.active_requests++)
+		mark_busy(i915);
+
 	return 0;
 }
 
-static void unreserve_seqno(struct intel_engine_cs *engine)
+static void unreserve_engine(struct intel_engine_cs *engine)
 {
+	struct drm_i915_private *i915 = engine->i915;
+
+	if (!--i915->gt.active_requests) {
+		GEM_BUG_ON(!i915->gt.awake);
+		mod_delayed_work(i915->wq,
+				 &i915->gt.idle_work,
+				 msecs_to_jiffies(100));
+	}
+
 	GEM_BUG_ON(!engine->timeline->inflight_seqnos);
 	engine->timeline->inflight_seqnos--;
 }
@@ -333,13 +365,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	list_del_init(&request->link);
 	spin_unlock_irq(&engine->timeline->lock);
 
-	if (!--request->i915->gt.active_requests) {
-		GEM_BUG_ON(!request->i915->gt.awake);
-		mod_delayed_work(request->i915->wq,
-				 &request->i915->gt.idle_work,
-				 msecs_to_jiffies(100));
-	}
-	unreserve_seqno(request->engine);
+	unreserve_engine(request->engine);
 	advance_ring(request);
 
 	free_capture_list(request);
@@ -575,7 +601,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		return ERR_CAST(ring);
 	GEM_BUG_ON(!ring);
 
-	ret = reserve_seqno(engine);
+	ret = reserve_engine(engine);
 	if (ret)
 		goto err_unpin;
 
@@ -681,7 +707,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	kmem_cache_free(dev_priv->requests, req);
 err_unreserve:
-	unreserve_seqno(engine);
+	unreserve_engine(engine);
 err_unpin:
 	engine->context_unpin(engine, ctx);
 	return ERR_PTR(ret);
@@ -863,28 +889,6 @@ i915_gem_request_await_object(struct drm_i915_gem_request *to,
 	return ret;
 }
 
-static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	if (dev_priv->gt.awake)
-		return;
-
-	GEM_BUG_ON(!dev_priv->gt.active_requests);
-
-	intel_runtime_pm_get_noresume(dev_priv);
-	dev_priv->gt.awake = true;
-
-	intel_enable_gt_powersave(dev_priv);
-	i915_update_gfx_val(dev_priv);
-	if (INTEL_GEN(dev_priv) >= 6)
-		gen6_rps_busy(dev_priv);
-
-	queue_delayed_work(dev_priv->wq,
-			   &dev_priv->gt.retire_work,
-			   round_jiffies_up_relative(HZ));
-}
-
 /*
  * NB: This function is not allowed to fail. Doing so would mean the the
  * request is not being tracked for completion but the work itself is
@@ -966,9 +970,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	list_add_tail(&request->ring_link, &ring->request_list);
 	request->emitted_jiffies = jiffies;
 
-	if (!request->i915->gt.active_requests++)
-		i915_gem_mark_busy(engine);
-
 	/* Let the backend know a new request has arrived that may need
 	 * to adjust the existing execution schedule due to a high priority
 	 * request - i.e. we may want to preempt the current request in order
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs (rev2)
  2017-08-16  8:52 [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Chris Wilson
                   ` (6 preceding siblings ...)
  2017-08-16  9:10 ` ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Patchwork
@ 2017-08-17 15:36 ` Patchwork
  2017-08-18 11:40   ` Chris Wilson
  2017-08-18 11:07 ` [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Mika Kuoppala
  8 siblings, 1 reply; 19+ messages in thread
From: Patchwork @ 2017-08-17 15:36 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs (rev2)
URL   : https://patchwork.freedesktop.org/series/28859/
State : success

== Summary ==

Series 28859v2 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/28859/revisions/2/mbox/

fi-bdw-5557u     total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:455s
fi-bdw-gvtdvm    total:279  pass:265  dwarn:0   dfail:0   fail:0   skip:14  time:445s
fi-bsw-n3050     total:279  pass:243  dwarn:0   dfail:0   fail:0   skip:36  time:556s
fi-bxt-j4205     total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:526s
fi-byt-j1900     total:279  pass:254  dwarn:1   dfail:0   fail:0   skip:24  time:526s
fi-byt-n2820     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:512s
fi-glk-2a        total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:610s
fi-hsw-4770      total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:448s
fi-hsw-4770r     total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:423s
fi-ilk-650       total:279  pass:229  dwarn:0   dfail:0   fail:0   skip:50  time:419s
fi-ivb-3520m     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:507s
fi-ivb-3770      total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:477s
fi-kbl-7500u     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:475s
fi-kbl-7560u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:595s
fi-kbl-r         total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:604s
fi-pnv-d510      total:279  pass:223  dwarn:1   dfail:0   fail:0   skip:55  time:527s
fi-skl-6260u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:471s
fi-skl-6700k     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:466s
fi-skl-6770hq    total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:492s
fi-skl-gvtdvm    total:279  pass:266  dwarn:0   dfail:0   fail:0   skip:13  time:443s
fi-skl-x1585l    total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:484s
fi-snb-2520m     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:547s
fi-snb-2600      total:279  pass:250  dwarn:0   dfail:0   fail:0   skip:29  time:408s

e1b18fbd4daecb1c1cf31ca101f64e29a8933bcf drm-tip: 2017y-08m-17d-14h-58m-23s UTC integration manifest
7c0a37b11bd0 drm/i915: Mark the GT as busy before idling the previous request
0e4845519fbb drm/i915: Trivial grammar fix s/opt of/opt out of/ in comment
6928515da09d drm/i915: Replace execbuf vma ht with an idr
dca8988c612a drm/i915: Simplify eb_lookup_vmas()
84ca392b85e3 drm/i915: Convert execbuf to use struct-of-array packing for critical fields
a9902e0bff38 drm/i915: Check context status before looking up our obj/vma
155bcaa01da6 drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5429/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/7] drm/i915: Check context status before looking up our obj/vma
  2017-08-17 14:10   ` Mika Kuoppala
@ 2017-08-17 15:43     ` Chris Wilson
  0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2017-08-17 15:43 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2017-08-17 15:10:02)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Since we keep the context around across the slow lookup where we may
> > drop the struct_mutex, we should double check that the context is still
> > valid upon reacquisition.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 ++++++-------
> >  1 file changed, 6 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index 359d5dc6d8df..044fb1205554 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -679,13 +679,6 @@ static int eb_select_context(struct i915_execbuffer *eb)
> >       if (unlikely(!ctx))
> >               return -ENOENT;
> >  
> > -     if (unlikely(i915_gem_context_is_banned(ctx))) {
> > -             DRM_DEBUG("Context %u tried to submit while banned\n",
> > -                       ctx->user_handle);
> > -             i915_gem_context_put(ctx);
> > -             return -EIO;
> > -     }
> > -
> >       eb->ctx = ctx;
> >       eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base;
> >  
> > @@ -707,6 +700,12 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
> >       int slow_pass = -1;
> >       int err;
> >  
> > +     if (unlikely(i915_gem_context_is_closed(eb->ctx)))
> > +             return -ENOENT;
> > +
> > +     if (unlikely(i915_gem_context_is_banned(eb->ctx)))
> > +             return -EIO;
> > +
> 
> We are referring the lut before the context has been validated.
> Not that it matters but for style, please consider assigning
> the lut after the context check.

~o~ Not feeling it. If it was checking for an invalid pointer, then yes
rearranging the code to avoid the appearance of the early dereference
makes sense, but as it is we are just creating an alias for part of the
ctx. Should look at whether gcc likes a few more locals...
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] drm/i915: Mark the GT as busy before idling the previous request
  2017-08-17 14:47     ` [PATCH v2] " Chris Wilson
@ 2017-08-18  9:11       ` Joonas Lahtinen
  0 siblings, 0 replies; 19+ messages in thread
From: Joonas Lahtinen @ 2017-08-18  9:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Thu, 2017-08-17 at 15:47 +0100, Chris Wilson wrote:
> In a synchronous setup, we may retire the last request before we
> complete allocating the next request. As the last request is retired, we
> queue a timer to mark the device as idle, and promptly have to execute
> ad cancel that timer once we complete allocating the request and need to
> keep the device awake. If we rearrange the mark_busy() to occur before
> we retire the previous request, we can skip this ping-pong.
> 
> v2: Joonas pointed out that unreserve_seqno() was now doing more than
> doing seqno handling and should be renamed to reflect its wider purpose.
> That also highlighted the new asymmetry with reserve_seqno(), so fixup
> that and rename both to [un]reserve_seqno().
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs
  2017-08-16  8:52 [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Chris Wilson
                   ` (7 preceding siblings ...)
  2017-08-17 15:36 ` ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs (rev2) Patchwork
@ 2017-08-18 11:07 ` Mika Kuoppala
  2017-08-18 11:17   ` Chris Wilson
  8 siblings, 1 reply; 19+ messages in thread
From: Mika Kuoppala @ 2017-08-18 11:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: drm-intel-fixes

Chris Wilson <chris@chris-wilson.co.uk> writes:

> MI_STORE_DWORD_IMM just doesn't work on the video decode engine under
> Sandybridge, so refrain from using it. Then switch the selftests over to
> using the now common test prior to using MI_STORE_DWORD_IMM.
>
> Fixes: 7dd4f6729f92 ("drm/i915: Async GPU relocation processing")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.13-rc1+
> ---
>  drivers/gpu/drm/i915/i915_drv.h                     |  7 +++++++
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c          |  8 ++++----
>  drivers/gpu/drm/i915/i915_selftest.h                |  2 --
>  drivers/gpu/drm/i915/intel_ringbuffer.h             | 12 ++++++++++++
>  drivers/gpu/drm/i915/selftests/i915_gem_coherency.c |  2 +-
>  drivers/gpu/drm/i915/selftests/i915_gem_context.c   |  6 ++++--
>  drivers/gpu/drm/i915/selftests/intel_hangcheck.c    | 18 ++++++++++++------
>  7 files changed, 40 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 6c25c8520c87..620e53bd705a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -4327,4 +4327,11 @@ int remap_io_mapping(struct vm_area_struct *vma,
>  		     unsigned long addr, unsigned long pfn, unsigned long size,
>  		     struct io_mapping *iomap);
>  
> +static inline bool
> +intel_engine_can_store_dword(struct intel_engine_cs *engine)
> +{
> +	return __intel_engine_can_store_dword(INTEL_GEN(engine->i915),
> +					      engine->class);
> +}
> +
>  #endif
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 8e8bc7aefd9c..359d5dc6d8df 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1268,7 +1268,9 @@ relocate_entry(struct i915_vma *vma,
>  
>  	if (!eb->reloc_cache.vaddr &&
>  	    (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
> -	     !reservation_object_test_signaled_rcu(vma->resv, true))) {
> +	     !reservation_object_test_signaled_rcu(vma->resv, true)) &&
> +	    __intel_engine_can_store_dword(eb->reloc_cache.gen,
> +					   eb->engine->class)) {
>  		const unsigned int gen = eb->reloc_cache.gen;

If you lift this to upper scope, you can make the check little
shorter. But incase you are avoiding the assignment to the latest,
i am not insisting.

There is engine in the eb so could you elaborate that
do we get by not doig intel_engine_can_store_dword(eb->engine)?

-Mika

>  		unsigned int len;
>  		u32 *batch;
> @@ -1278,10 +1280,8 @@ relocate_entry(struct i915_vma *vma,
>  			len = offset & 7 ? 8 : 5;
>  		else if (gen >= 4)
>  			len = 4;
> -		else if (gen >= 3)
> +		else
>  			len = 3;
> -		else /* On gen2 MI_STORE_DWORD_IMM uses a physical address */
> -			goto repeat;
>  
>  		batch = reloc_gpu(eb, vma, len);
>  		if (IS_ERR(batch))
> diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h
> index 9d7d86f1733d..78e1a1b168ff 100644
> --- a/drivers/gpu/drm/i915/i915_selftest.h
> +++ b/drivers/gpu/drm/i915/i915_selftest.h
> @@ -101,6 +101,4 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...);
>  #define igt_timeout(t, fmt, ...) \
>  	__igt_timeout((t), KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
>  
> -#define igt_can_mi_store_dword_imm(D) (INTEL_GEN(D) > 2)
> -
>  #endif /* !__I915_SELFTEST_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index d33c93444c0d..02d8974bf9ab 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -735,4 +735,16 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
>  void intel_engines_mark_idle(struct drm_i915_private *i915);
>  void intel_engines_reset_default_submission(struct drm_i915_private *i915);
>  
> +static inline bool
> +__intel_engine_can_store_dword(unsigned int gen, unsigned int class)
> +{
> +	if (gen <= 2)
> +		return false; /* uses physical not virtual addresses */
> +
> +	if (gen == 6 && class == VIDEO_DECODE_CLASS)
> +		return false; /* b0rked */
> +
> +	return true;
> +}
> +
>  #endif /* _INTEL_RINGBUFFER_H_ */
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> index 95d4aebc0181..35d778d70626 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> @@ -241,7 +241,7 @@ static bool always_valid(struct drm_i915_private *i915)
>  
>  static bool needs_mi_store_dword(struct drm_i915_private *i915)
>  {
> -	return igt_can_mi_store_dword_imm(i915);
> +	return intel_engine_can_store_dword(i915->engine[RCS]);
>  }
>  
>  static const struct igt_coherency_mode {
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 12b85b3278cd..fb0a58fc8348 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -38,8 +38,6 @@ gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
>  	u32 *cmd;
>  	int err;
>  
> -	GEM_BUG_ON(!igt_can_mi_store_dword_imm(vma->vm->i915));
> -
>  	size = (4 * count + 1) * sizeof(u32);
>  	size = round_up(size, PAGE_SIZE);
>  	obj = i915_gem_object_create_internal(vma->vm->i915, size);
> @@ -123,6 +121,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
>  	int err;
>  
>  	GEM_BUG_ON(obj->base.size > vm->total);
> +	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
>  
>  	vma = i915_vma_instance(obj, vm, NULL);
>  	if (IS_ERR(vma))
> @@ -359,6 +358,9 @@ static int igt_ctx_exec(void *arg)
>  		}
>  
>  		for_each_engine(engine, i915, id) {
> +			if (!intel_engine_can_store_dword(engine))
> +				continue;
> +
>  			if (!obj) {
>  				obj = create_test_object(ctx, file, &objects);
>  				if (IS_ERR(obj)) {
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index 208b34e864fb..02e52a146ed8 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -253,9 +253,6 @@ static int igt_hang_sanitycheck(void *arg)
>  
>  	/* Basic check that we can execute our hanging batch */
>  
> -	if (!igt_can_mi_store_dword_imm(i915))
> -		return 0;
> -
>  	mutex_lock(&i915->drm.struct_mutex);
>  	err = hang_init(&h, i915);
>  	if (err)
> @@ -264,6 +261,9 @@ static int igt_hang_sanitycheck(void *arg)
>  	for_each_engine(engine, i915, id) {
>  		long timeout;
>  
> +		if (!intel_engine_can_store_dword(engine))
> +			continue;
> +
>  		rq = hang_create_request(&h, engine, i915->kernel_context);
>  		if (IS_ERR(rq)) {
>  			err = PTR_ERR(rq);
> @@ -599,6 +599,9 @@ static int igt_wait_reset(void *arg)
>  	long timeout;
>  	int err;
>  
> +	if (!intel_engine_can_store_dword(i915->engine[RCS]))
> +		return 0;
> +
>  	/* Check that we detect a stuck waiter and issue a reset */
>  
>  	global_reset_lock(i915);
> @@ -664,9 +667,6 @@ static int igt_reset_queue(void *arg)
>  
>  	/* Check that we replay pending requests following a hang */
>  
> -	if (!igt_can_mi_store_dword_imm(i915))
> -		return 0;
> -
>  	global_reset_lock(i915);
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> @@ -679,6 +679,9 @@ static int igt_reset_queue(void *arg)
>  		IGT_TIMEOUT(end_time);
>  		unsigned int count;
>  
> +		if (!intel_engine_can_store_dword(engine))
> +			continue;
> +
>  		prev = hang_create_request(&h, engine, i915->kernel_context);
>  		if (IS_ERR(prev)) {
>  			err = PTR_ERR(prev);
> @@ -784,6 +787,9 @@ static int igt_handle_error(void *arg)
>  	if (!intel_has_reset_engine(i915))
>  		return 0;
>  
> +	if (!intel_engine_can_store_dword(i915->engine[RCS]))
> +		return 0;
> +
>  	mutex_lock(&i915->drm.struct_mutex);
>  
>  	err = hang_init(&h, i915);
> -- 
> 2.13.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs
  2017-08-18 11:07 ` [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Mika Kuoppala
@ 2017-08-18 11:17   ` Chris Wilson
  2017-08-18 11:23     ` Mika Kuoppala
  0 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2017-08-18 11:17 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx; +Cc: drm-intel-fixes

Quoting Mika Kuoppala (2017-08-18 12:07:20)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> >       if (!eb->reloc_cache.vaddr &&
> >           (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
> > -          !reservation_object_test_signaled_rcu(vma->resv, true))) {
> > +          !reservation_object_test_signaled_rcu(vma->resv, true)) &&
> > +         __intel_engine_can_store_dword(eb->reloc_cache.gen,
> > +                                        eb->engine->class)) {
> >               const unsigned int gen = eb->reloc_cache.gen;
> 
> If you lift this to upper scope, you can make the check little
> shorter. But incase you are avoiding the assignment to the latest,
> i am not insisting.
> 
> There is engine in the eb so could you elaborate that
> do we get by not doig intel_engine_can_store_dword(eb->engine)?

I'm just partial to avoiding the pointer chasing. I want to make sure
that the compiler/cpu see that we are reusing the gen, and prefer to
keep the gen that we do use next to its friends inside eb->reloc_cache.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs
  2017-08-18 11:17   ` Chris Wilson
@ 2017-08-18 11:23     ` Mika Kuoppala
  0 siblings, 0 replies; 19+ messages in thread
From: Mika Kuoppala @ 2017-08-18 11:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: drm-intel-fixes

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2017-08-18 12:07:20)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> >       if (!eb->reloc_cache.vaddr &&
>> >           (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
>> > -          !reservation_object_test_signaled_rcu(vma->resv, true))) {
>> > +          !reservation_object_test_signaled_rcu(vma->resv, true)) &&
>> > +         __intel_engine_can_store_dword(eb->reloc_cache.gen,
>> > +                                        eb->engine->class)) {
>> >               const unsigned int gen = eb->reloc_cache.gen;
>> 
>> If you lift this to upper scope, you can make the check little
>> shorter. But incase you are avoiding the assignment to the latest,
>> i am not insisting.
>> 
>> There is engine in the eb so could you elaborate that
>> do we get by not doig intel_engine_can_store_dword(eb->engine)?
>
> I'm just partial to avoiding the pointer chasing. I want to make sure
> that the compiler/cpu see that we are reusing the gen, and prefer to
> keep the gen that we do use next to its friends inside
> eb->reloc_cache.

Ok.

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs (rev2)
  2017-08-17 15:36 ` ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs (rev2) Patchwork
@ 2017-08-18 11:40   ` Chris Wilson
  0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2017-08-18 11:40 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx

Quoting Patchwork (2017-08-17 16:36:15)
> == Series Details ==
> 
> Series: series starting with [1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs (rev2)
> URL   : https://patchwork.freedesktop.org/series/28859/
> State : success
> 
> == Summary ==
> 
> Series 28859v2 Series without cover letter
> https://patchwork.freedesktop.org/api/1.0/series/28859/revisions/2/mbox/
> 
> fi-bdw-5557u     total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:455s
> fi-bdw-gvtdvm    total:279  pass:265  dwarn:0   dfail:0   fail:0   skip:14  time:445s
> fi-bsw-n3050     total:279  pass:243  dwarn:0   dfail:0   fail:0   skip:36  time:556s
> fi-bxt-j4205     total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:526s
> fi-byt-j1900     total:279  pass:254  dwarn:1   dfail:0   fail:0   skip:24  time:526s
> fi-byt-n2820     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:512s
> fi-glk-2a        total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:610s
> fi-hsw-4770      total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:448s
> fi-hsw-4770r     total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:423s
> fi-ilk-650       total:279  pass:229  dwarn:0   dfail:0   fail:0   skip:50  time:419s
> fi-ivb-3520m     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:507s
> fi-ivb-3770      total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:477s
> fi-kbl-7500u     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:475s
> fi-kbl-7560u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:595s
> fi-kbl-r         total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:604s
> fi-pnv-d510      total:279  pass:223  dwarn:1   dfail:0   fail:0   skip:55  time:527s
> fi-skl-6260u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:471s
> fi-skl-6700k     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:466s
> fi-skl-6770hq    total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:492s
> fi-skl-gvtdvm    total:279  pass:266  dwarn:0   dfail:0   fail:0   skip:13  time:443s
> fi-skl-x1585l    total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:484s
> fi-snb-2520m     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:547s
> fi-snb-2600      total:279  pass:250  dwarn:0   dfail:0   fail:0   skip:29  time:408s
> 
> e1b18fbd4daecb1c1cf31ca101f64e29a8933bcf drm-tip: 2017y-08m-17d-14h-58m-23s UTC integration manifest
> 7c0a37b11bd0 drm/i915: Mark the GT as busy before idling the previous request
> 0e4845519fbb drm/i915: Trivial grammar fix s/opt of/opt out of/ in comment
> 6928515da09d drm/i915: Replace execbuf vma ht with an idr
> dca8988c612a drm/i915: Simplify eb_lookup_vmas()
> 84ca392b85e3 drm/i915: Convert execbuf to use struct-of-array packing for critical fields
> a9902e0bff38 drm/i915: Check context status before looking up our obj/vma
> 155bcaa01da6 drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs

Thank you everyone for the review, it's the last bit of execbuf tuning I
have planned for a while, so you can feel relieved about that. (Just
hopefully minor fixes like size_t promotion remain.)

Where do we go with execbuf from here? It is still the most prominent
kernel overhead for many clients (although _pageflip_ is a surprising
contender). There's some work we can do to improve scalability of
reservation_object, but I think we have come to the end of the road for
execbuf2 (the last major interface improvement was 2011). Do we care to
hash out a plan for execbuf3? A few suggestions have focused on the use
of ringbuffers for passing commands between userspace and the kernel,
along with more compact instructions that eliminate a lot of redundancy
between execbuf calls; i.e. we try to keep all the preprocessing in
userspace, with the kernel doing the resource checking around execution.
Ideas?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2017-08-18 11:40 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-08-16  8:52 [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Chris Wilson
2017-08-16  8:52 ` [PATCH 2/7] drm/i915: Check context status before looking up our obj/vma Chris Wilson
2017-08-17 14:10   ` Mika Kuoppala
2017-08-17 15:43     ` Chris Wilson
2017-08-16  8:52 ` [PATCH 3/7] drm/i915: Convert execbuf to use struct-of-array packing for critical fields Chris Wilson
2017-08-16  8:52 ` [PATCH 4/7] drm/i915: Simplify eb_lookup_vmas() Chris Wilson
2017-08-16  8:52 ` [PATCH 5/7] drm/i915: Replace execbuf vma ht with an idr Chris Wilson
2017-08-16  8:52 ` [PATCH 6/7] drm/i915: Trivial grammar fix s/opt of/opt out of/ in comment Chris Wilson
2017-08-17 13:38   ` Joonas Lahtinen
2017-08-16  8:52 ` [PATCH 7/7] drm/i915: Mark the GT as busy before idling the previous request Chris Wilson
2017-08-17 13:38   ` Joonas Lahtinen
2017-08-17 14:47     ` [PATCH v2] " Chris Wilson
2017-08-18  9:11       ` Joonas Lahtinen
2017-08-16  9:10 ` ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Patchwork
2017-08-17 15:36 ` ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs (rev2) Patchwork
2017-08-18 11:40   ` Chris Wilson
2017-08-18 11:07 ` [PATCH 1/7] drm/i915: Don't use MI_STORE_DWORD_IMM on Sandybridge/vcs Mika Kuoppala
2017-08-18 11:17   ` Chris Wilson
2017-08-18 11:23     ` Mika Kuoppala

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).