All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence
@ 2017-05-11 19:59 Chris Wilson
  2017-05-11 19:59 ` [PATCH 02/12] drm/i915: Import the kfence selftests for i915_sw_fence Chris Wilson
                   ` (15 more replies)
  0 siblings, 16 replies; 36+ messages in thread
From: Chris Wilson @ 2017-05-11 19:59 UTC (permalink / raw)
  To: intel-gfx

My original intention was for i915_sw_fence to be the base class and
provide the reference count for the container. This was from starting
with a design to handle async_work. In practice, for i915 we embed
fences into structs which have their own independent reference counting,
making the i915_sw_fence.kref duplicitous. If we remove the kref, we
remove the i915_sw_fence's ability to free itself and its independence,
it can only exist within a container and must be supplied with a
callback to handle its release.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_sw_fence.c | 55 ++++++++----------------------------
 drivers/gpu/drm/i915/i915_sw_fence.h |  1 -
 2 files changed, 11 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index a277f8eb7beb..a0a690d6627e 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -120,34 +120,6 @@ void i915_sw_fence_fini(struct i915_sw_fence *fence)
 }
 #endif
 
-static void i915_sw_fence_release(struct kref *kref)
-{
-	struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
-
-	WARN_ON(atomic_read(&fence->pending) > 0);
-	debug_fence_destroy(fence);
-
-	if (fence->flags & I915_SW_FENCE_MASK) {
-		__i915_sw_fence_notify(fence, FENCE_FREE);
-	} else {
-		i915_sw_fence_fini(fence);
-		kfree(fence);
-	}
-}
-
-static void i915_sw_fence_put(struct i915_sw_fence *fence)
-{
-	debug_fence_assert(fence);
-	kref_put(&fence->kref, i915_sw_fence_release);
-}
-
-static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence)
-{
-	debug_fence_assert(fence);
-	kref_get(&fence->kref);
-	return fence;
-}
-
 static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
 					struct list_head *continuation)
 {
@@ -202,13 +174,15 @@ static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
 
 	debug_fence_set_state(fence, DEBUG_FENCE_IDLE, DEBUG_FENCE_NOTIFY);
 
-	if (fence->flags & I915_SW_FENCE_MASK &&
-	    __i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
+	if (__i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
 		return;
 
 	debug_fence_set_state(fence, DEBUG_FENCE_NOTIFY, DEBUG_FENCE_IDLE);
 
 	__i915_sw_fence_wake_up_all(fence, continuation);
+
+	debug_fence_destroy(fence);
+	__i915_sw_fence_notify(fence, FENCE_FREE);
 }
 
 static void i915_sw_fence_complete(struct i915_sw_fence *fence)
@@ -232,33 +206,26 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
 			  const char *name,
 			  struct lock_class_key *key)
 {
-	BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
+	BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK);
 
 	debug_fence_init(fence);
 
 	__init_waitqueue_head(&fence->wait, name, key);
-	kref_init(&fence->kref);
 	atomic_set(&fence->pending, 1);
 	fence->flags = (unsigned long)fn;
 }
 
-static void __i915_sw_fence_commit(struct i915_sw_fence *fence)
-{
-	i915_sw_fence_complete(fence);
-	i915_sw_fence_put(fence);
-}
-
 void i915_sw_fence_commit(struct i915_sw_fence *fence)
 {
 	debug_fence_activate(fence);
-	__i915_sw_fence_commit(fence);
+	i915_sw_fence_complete(fence);
 }
 
 static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
 {
 	list_del(&wq->task_list);
 	__i915_sw_fence_complete(wq->private, key);
-	i915_sw_fence_put(wq->private);
+
 	if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
 		kfree(wq);
 	return 0;
@@ -353,7 +320,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 	INIT_LIST_HEAD(&wq->task_list);
 	wq->flags = pending;
 	wq->func = i915_sw_fence_wake;
-	wq->private = i915_sw_fence_get(fence);
+	wq->private = fence;
 
 	i915_sw_fence_await(fence);
 
@@ -402,7 +369,7 @@ static void timer_i915_sw_fence_wake(unsigned long data)
 	dma_fence_put(cb->dma);
 	cb->dma = NULL;
 
-	__i915_sw_fence_commit(cb->fence);
+	i915_sw_fence_complete(cb->fence);
 	cb->timer.function = NULL;
 }
 
@@ -413,7 +380,7 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma,
 
 	del_timer_sync(&cb->timer);
 	if (cb->timer.function)
-		__i915_sw_fence_commit(cb->fence);
+		i915_sw_fence_complete(cb->fence);
 	dma_fence_put(cb->dma);
 
 	kfree(cb);
@@ -440,7 +407,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 		return dma_fence_wait(dma, false);
 	}
 
-	cb->fence = i915_sw_fence_get(fence);
+	cb->fence = fence;
 	i915_sw_fence_await(fence);
 
 	cb->dma = NULL;
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index d31cefbbcc04..1d3b6051daaf 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -23,7 +23,6 @@ struct reservation_object;
 struct i915_sw_fence {
 	wait_queue_head_t wait;
 	unsigned long flags;
-	struct kref kref;
 	atomic_t pending;
 };
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 02/12] drm/i915: Import the kfence selftests for i915_sw_fence
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
@ 2017-05-11 19:59 ` Chris Wilson
  2017-05-15 10:52   ` [PATCH v2] " Chris Wilson
  2017-05-11 19:59 ` [PATCH 03/12] drm/i915: Make ptr_unpack_bits() more function-like Chris Wilson
                   ` (14 subsequent siblings)
  15 siblings, 1 reply; 36+ messages in thread
From: Chris Wilson @ 2017-05-11 19:59 UTC (permalink / raw)
  To: intel-gfx

A long time ago, I wrote some selftests for the struct kfence idea. Now
that we have infrastructure in i915/igt for running kselftests, include
some for i915_sw_fence.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig.debug                 |  12 +
 drivers/gpu/drm/i915/i915_sw_fence.c               |   7 +-
 .../gpu/drm/i915/selftests/i915_mock_selftests.h   |   1 +
 drivers/gpu/drm/i915/selftests/i915_sw_fence.c     | 577 +++++++++++++++++++++
 4 files changed, 596 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/i915_sw_fence.c

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index e091809a9a9e..d4860c3a3bf9 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -61,6 +61,18 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS
 
           If in doubt, say "N".
 
+config DRM_I915_SW_FENCE_CHECK_DAG
+        bool "Enable additional driver debugging for detecting dependency cycles"
+        depends on DRM_I915
+        default n
+        help
+          Choose this option to turn on extra driver debugging that may affect
+          performance but will catch some internal issues.
+
+          Recommended for driver developers only.
+
+          If in doubt, say "N".
+
 config DRM_I915_SELFTEST
 	bool "Enable selftests upon driver load"
 	depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index a0a690d6627e..474d23c0c0ce 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -12,6 +12,7 @@
 #include <linux/reservation.h>
 
 #include "i915_sw_fence.h"
+#include "i915_selftest.h"
 
 #define I915_SW_FENCE_FLAG_ALLOC BIT(3) /* after WQ_FLAG_* for safety */
 
@@ -274,7 +275,7 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 	unsigned long flags;
 	bool err;
 
-	if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG))
+	if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
 		return false;
 
 	spin_lock_irqsave(&i915_sw_fence_lock, flags);
@@ -490,3 +491,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 
 	return ret;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_sw_fence.c"
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 76c1f149a0a0..fc74687501ba 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -9,6 +9,7 @@
  * Tests are executed in order by igt/drv_selftest
  */
 selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */
+selftest(fence, i915_sw_fence_mock_selftests)
 selftest(scatterlist, scatterlist_mock_selftests)
 selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
new file mode 100644
index 000000000000..0e9fa37237b5
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
@@ -0,0 +1,577 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/completion.h>
+#include <linux/delay.h>
+
+#include "../i915_selftest.h"
+
+static int __i915_sw_fence_call
+fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	switch (state) {
+	case FENCE_COMPLETE:
+		break;
+
+	case FENCE_FREE:
+		/* Leave the fence for the caller to free it after testing */
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct i915_sw_fence *alloc_fence(void)
+{
+	struct i915_sw_fence *fence;
+
+	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	i915_sw_fence_init(fence, fence_notify);
+	return fence;
+}
+
+static void free_fence(struct i915_sw_fence *fence)
+{
+	i915_sw_fence_fini(fence);
+	kfree(fence);
+}
+
+static int __test_self(struct i915_sw_fence *fence)
+{
+	if (i915_sw_fence_done(fence))
+		return -EINVAL;
+
+	i915_sw_fence_complete(fence);
+	if (!i915_sw_fence_done(fence))
+		return -EINVAL;
+
+	i915_sw_fence_wait(fence);
+	if (!i915_sw_fence_done(fence))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int test_self(void *arg)
+{
+	struct i915_sw_fence *fence;
+	int ret;
+
+	/* Test i915_sw_fence signaling and completion testing */
+	fence = alloc_fence();
+	if (!fence)
+		return -ENOMEM;
+
+	ret = __test_self(fence);
+
+	free_fence(fence);
+	return ret;
+}
+
+static int test_dag(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret = -EINVAL;
+
+	/* Test detection of cycles within the i915_sw_fence graphs */
+	if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
+		return 0;
+
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	if (i915_sw_fence_await_sw_fence_gfp(A, A, GFP_KERNEL) != -EINVAL) {
+		pr_err("recursive cycle not detected (AA)\n");
+		goto err_A;
+	}
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+	if (i915_sw_fence_await_sw_fence_gfp(B, A, GFP_KERNEL) != -EINVAL) {
+		pr_err("single depth cycle not detected (BAB)\n");
+		goto err_B;
+	}
+
+	C = alloc_fence();
+	if (i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL) == -EINVAL) {
+		pr_err("invalid cycle detected\n");
+		goto err_C;
+	}
+	if (i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL) != -EINVAL) {
+		pr_err("single depth cycle not detected (CBC)\n");
+		goto err_C;
+	}
+	if (i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL) != -EINVAL) {
+		pr_err("cycle not detected (BA, CB, AC)\n");
+		goto err_C;
+	}
+	if (i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL) == -EINVAL) {
+		pr_err("invalid cycle detected\n");
+		goto err_C;
+	}
+
+	i915_sw_fence_complete(A);
+	i915_sw_fence_complete(B);
+	i915_sw_fence_complete(C);
+
+	ret = 0;
+	if (!i915_sw_fence_done(C)) {
+		pr_err("fence C not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(B)) {
+		pr_err("fence B not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(A)) {
+		pr_err("fence A not done\n");
+		ret = -EINVAL;
+	}
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_AB(void *arg)
+{
+	struct i915_sw_fence *A, *B;
+	int ret;
+
+	/* Test i915_sw_fence (A) waiting on an event source (B) */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+	if (ret < 0)
+		goto err_B;
+	if (ret == 0) {
+		pr_err("Incorrectly reported fence A was complete before await\n");
+		ret = -EINVAL;
+		goto err_B;
+	}
+
+	ret = -EINVAL;
+	i915_sw_fence_complete(A);
+	if (i915_sw_fence_done(A))
+		goto err_B;
+
+	i915_sw_fence_complete(B);
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B is not done\n");
+		goto err_B;
+	}
+
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A is not done\n");
+		goto err_B;
+	}
+
+	ret = 0;
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_ABC(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret;
+
+	/* Test a chain of fences, A waits on B who waits on C */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	C = alloc_fence();
+	if (!C) {
+		ret = -ENOMEM;
+		goto err_B;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		pr_err("Incorrectly reported fence B was complete before await\n");
+		goto err_C;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		pr_err("Incorrectly reported fence C was complete before await\n");
+		goto err_C;
+	}
+
+	ret = -EINVAL;
+	i915_sw_fence_complete(A);
+	if (i915_sw_fence_done(A)) {
+		pr_err("Fence A completed early\n");
+		goto err_C;
+	}
+
+	i915_sw_fence_complete(B);
+	if (i915_sw_fence_done(B)) {
+		pr_err("Fence B completed early\n");
+		goto err_C;
+	}
+
+	if (i915_sw_fence_done(A)) {
+		pr_err("Fence A completed early (after signaling B)\n");
+		goto err_C;
+	}
+
+	i915_sw_fence_complete(C);
+
+	ret = 0;
+	if (!i915_sw_fence_done(C)) {
+		pr_err("Fence C not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A not done\n");
+		ret = -EINVAL;
+	}
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_AB_C(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret = -EINVAL;
+
+	/* Test multiple fences (AB) waiting on a single event (C) */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	C = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_B;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	i915_sw_fence_complete(A);
+	i915_sw_fence_complete(B);
+
+	ret = 0;
+	if (i915_sw_fence_done(A)) {
+		pr_err("Fence A completed early\n");
+		ret = -EINVAL;
+	}
+
+	if (i915_sw_fence_done(B)) {
+		pr_err("Fence B completed early\n");
+		ret = -EINVAL;
+	}
+
+	i915_sw_fence_complete(C);
+	if (!i915_sw_fence_done(C)) {
+		pr_err("Fence C not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A not done\n");
+		ret = -EINVAL;
+	}
+
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_C_AB(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret;
+
+	/* Test multiple event sources (A,B) for a single fence (C) */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	C = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_B;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	ret = 0;
+	i915_sw_fence_complete(C);
+	if (i915_sw_fence_done(C))
+		ret = -EINVAL;
+
+	i915_sw_fence_complete(A);
+	i915_sw_fence_complete(B);
+
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(C)) {
+		pr_err("Fence C not done\n");
+		ret = -EINVAL;
+	}
+
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_chain(void *arg)
+{
+	int nfences = 4096;
+	struct i915_sw_fence **fences;
+	int ret, i;
+
+	/* Test a long chain of fences */
+	fences = kmalloc_array(nfences, sizeof(*fences), GFP_KERNEL);
+	if (!fences)
+		return -ENOMEM;
+
+	for (i = 0; i < nfences; i++) {
+		fences[i] = alloc_fence();
+		if (!fences[i]) {
+			nfences = i;
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		if (i > 0) {
+			ret = i915_sw_fence_await_sw_fence_gfp(fences[i],
+							       fences[i - 1],
+							       GFP_KERNEL);
+			if (ret < 0) {
+				nfences = i + 1;
+				goto err;
+			}
+		}
+	}
+
+	ret = 0;
+	for (i = nfences; --i; ) {
+		i915_sw_fence_complete(fences[i]);
+		if (i915_sw_fence_done(fences[i])) {
+			if (ret == 0)
+				pr_err("Fence[%d] completed early\n", i);
+			ret = -EINVAL;
+		}
+	}
+	i915_sw_fence_complete(fences[0]);
+
+	for (i = 0; ret == 0 && i < nfences; i++) {
+		if (!i915_sw_fence_done(fences[i])) {
+			pr_err("Fence[%d] is not done\n", i);
+			ret = -EINVAL;
+		}
+	}
+
+err:
+	for (i = 0; i < nfences; i++)
+		free_fence(fences[i]);
+	kfree(fences);
+	return ret;
+}
+
+struct task_ipc {
+	struct work_struct work;
+	struct completion started;
+	struct i915_sw_fence *in, *out;
+	int value;
+};
+
+static void task_ipc(struct work_struct *work)
+{
+	struct task_ipc *ipc = container_of(work, typeof(*ipc), work);
+
+	complete(&ipc->started);
+
+	i915_sw_fence_wait(ipc->in);
+	smp_store_mb(ipc->value, 1);
+	i915_sw_fence_complete(ipc->out);
+}
+
+static int test_ipc(void *arg)
+{
+	struct task_ipc ipc;
+	int ret = 0;
+
+	/* Test use of i915_sw_fence as an interprocess signaling mechanism */
+	ipc.in = alloc_fence();
+	if (!ipc.in)
+		return -ENOMEM;
+	ipc.out = alloc_fence();
+	if (!ipc.out) {
+		ret = -ENOMEM;
+		goto err_in;
+	}
+	if (!ipc.in || !ipc.out)
+
+	/* use a completion to avoid chicken-and-egg testing */
+	init_completion(&ipc.started);
+
+	ipc.value = 0;
+	INIT_WORK(&ipc.work, task_ipc);
+	schedule_work(&ipc.work);
+
+	wait_for_completion(&ipc.started);
+
+	usleep_range(1000, 2000);
+	if (READ_ONCE(ipc.value)) {
+		pr_err("worker updated value before i915_sw_fence was signaled\n");
+		ret = -EINVAL;
+	}
+
+	i915_sw_fence_complete(ipc.in);
+	i915_sw_fence_wait(ipc.out);
+
+	if (!READ_ONCE(ipc.value)) {
+		pr_err("worker signaled i915_sw_fence before value was posted\n");
+		ret = -EINVAL;
+	}
+
+	flush_work(&ipc.work);
+	free_fence(ipc.out);
+err_in:
+	free_fence(ipc.in);
+	return ret;
+}
+
+int i915_sw_fence_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(test_self),
+		SUBTEST(test_dag),
+		SUBTEST(test_AB),
+		SUBTEST(test_ABC),
+		SUBTEST(test_AB_C),
+		SUBTEST(test_C_AB),
+		SUBTEST(test_chain),
+		SUBTEST(test_ipc),
+	};
+
+	return i915_subtests(tests, NULL);
+}
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 03/12] drm/i915: Make ptr_unpack_bits() more function-like
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
  2017-05-11 19:59 ` [PATCH 02/12] drm/i915: Import the kfence selftests for i915_sw_fence Chris Wilson
@ 2017-05-11 19:59 ` Chris Wilson
  2017-05-11 19:59 ` [PATCH 04/12] drm/i915: Redefine ptr_pack_bits() and friends Chris Wilson
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Chris Wilson @ 2017-05-11 19:59 UTC (permalink / raw)
  To: intel-gfx

ptr_unpack_bits() is a function-like macro, as such it is meant to be
replaceable by a function. In this case, we should be passing in the
out-param as a pointer.

Bizarrely this does affect code generation:

function                                     old     new   delta
i915_gem_object_pin_map                      409     389     -20

An improvement(?) in this case, but one can't help wonder what
strict-aliasing optimisations we are preventing.

The generated code looks identical in using ptr_unpack_bits (no extra
motions to stack, the pointer and bits appear to be kept in registers),
the difference appears to be code ordering and with a reorder it is able
to use smaller forward jumps.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c   | 2 +-
 drivers/gpu/drm/i915/i915_utils.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f9c6b9b5002c..5d68bece2ac3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2612,7 +2612,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	}
 	GEM_BUG_ON(!obj->mm.pages);
 
-	ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
+	ptr = ptr_unpack_bits(obj->mm.mapping, &has_type);
 	if (ptr && has_type != type) {
 		if (pinned) {
 			ret = -EBUSY;
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index f9d6607ef52f..18630d8f4be8 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -77,7 +77,7 @@
 
 #define ptr_unpack_bits(ptr, bits) ({					\
 	unsigned long __v = (unsigned long)(ptr);			\
-	(bits) = __v & ~PAGE_MASK;					\
+	*(bits) = __v & ~PAGE_MASK;					\
 	(typeof(ptr))(__v & PAGE_MASK);					\
 })
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 04/12] drm/i915: Redefine ptr_pack_bits() and friends
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
  2017-05-11 19:59 ` [PATCH 02/12] drm/i915: Import the kfence selftests for i915_sw_fence Chris Wilson
  2017-05-11 19:59 ` [PATCH 03/12] drm/i915: Make ptr_unpack_bits() more function-like Chris Wilson
@ 2017-05-11 19:59 ` Chris Wilson
  2017-05-11 19:59 ` [PATCH 05/12] drm/i915/execlists: Pack the count into the low bits of the port.request Chris Wilson
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Chris Wilson @ 2017-05-11 19:59 UTC (permalink / raw)
  To: intel-gfx

Rebrand the current (pointer | bits) pack/unpack utility macros as
explicit bit twiddling for PAGE_SIZE so that we can use the more
flexible underlying macros for different bits.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c |  2 +-
 drivers/gpu/drm/i915/i915_gem.c        |  6 +++---
 drivers/gpu/drm/i915/i915_utils.h      | 19 +++++++++++++------
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 2a1a3347495a..f0cb22cc0dd6 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1284,7 +1284,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 
 		if (*cmd == MI_BATCH_BUFFER_END) {
 			if (needs_clflush_after) {
-				void *ptr = ptr_mask_bits(shadow_batch_obj->mm.mapping);
+				void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
 				drm_clflush_virt_range(ptr,
 						       (void *)(cmd + 1) - ptr);
 			}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5d68bece2ac3..b63c3d1ccd9f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2280,7 +2280,7 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 	if (obj->mm.mapping) {
 		void *ptr;
 
-		ptr = ptr_mask_bits(obj->mm.mapping);
+		ptr = page_mask_bits(obj->mm.mapping);
 		if (is_vmalloc_addr(ptr))
 			vunmap(ptr);
 		else
@@ -2612,7 +2612,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	}
 	GEM_BUG_ON(!obj->mm.pages);
 
-	ptr = ptr_unpack_bits(obj->mm.mapping, &has_type);
+	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
 	if (ptr && has_type != type) {
 		if (pinned) {
 			ret = -EBUSY;
@@ -2634,7 +2634,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 			goto err_unpin;
 		}
 
-		obj->mm.mapping = ptr_pack_bits(ptr, type);
+		obj->mm.mapping = page_pack_bits(ptr, type);
 	}
 
 out_unlock:
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 18630d8f4be8..d9df23795f9a 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -70,20 +70,27 @@
 #define overflows_type(x, T) \
 	(sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE))
 
-#define ptr_mask_bits(ptr) ({						\
+#define ptr_mask_bits(ptr, n) ({					\
 	unsigned long __v = (unsigned long)(ptr);			\
-	(typeof(ptr))(__v & PAGE_MASK);					\
+	(typeof(ptr))(__v & -BIT(n));					\
 })
 
-#define ptr_unpack_bits(ptr, bits) ({					\
+#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1))
+
+#define ptr_unpack_bits(ptr, bits, n) ({				\
 	unsigned long __v = (unsigned long)(ptr);			\
-	*(bits) = __v & ~PAGE_MASK;					\
-	(typeof(ptr))(__v & PAGE_MASK);					\
+	*(bits) = __v & (BIT(n) - 1);					\
+	(typeof(ptr))(__v & -BIT(n));					\
 })
 
-#define ptr_pack_bits(ptr, bits)					\
+#define ptr_pack_bits(ptr, bits, n)					\
 	((typeof(ptr))((unsigned long)(ptr) | (bits)))
 
+#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
+#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
+#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
+#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT)
+
 #define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member)
 
 #define fetch_and_zero(ptr) ({						\
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 05/12] drm/i915/execlists: Pack the count into the low bits of the port.request
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (2 preceding siblings ...)
  2017-05-11 19:59 ` [PATCH 04/12] drm/i915: Redefine ptr_pack_bits() and friends Chris Wilson
@ 2017-05-11 19:59 ` Chris Wilson
  2017-05-11 19:59 ` [PATCH 06/12] drm/i915: Don't mark an execlists context-switch when idle Chris Wilson
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Chris Wilson @ 2017-05-11 19:59 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

add/remove: 1/1 grow/shrink: 5/4 up/down: 391/-578 (-187)
function                                     old     new   delta
execlists_submit_ports                       262     471    +209
port_assign.isra                               -     136    +136
capture                                     6344    6359     +15
reset_common_ring                            438     452     +14
execlists_submit_request                     228     238     +10
gen8_init_common_ring                        334     341      +7
intel_engine_is_idle                         106     105      -1
i915_engine_info                            2314    2290     -24
__i915_gem_set_wedged_BKL                    485     411     -74
intel_lrc_irq_handler                       1789    1604    -185
execlists_update_context                     294       -    -294

The most important change there is the improve to the
intel_lrc_irq_handler and excclist_submit_ports (net improvement since
execlists_update_context is now inlined).

v2: Use the port_api() for guc as well (even though currently we do not
pack any counters in there, yet) and hide all port->request_count inside
the helpers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  32 ++++----
 drivers/gpu/drm/i915/i915_gem.c            |   6 +-
 drivers/gpu/drm/i915/i915_gpu_error.c      |  13 +++-
 drivers/gpu/drm/i915/i915_guc_submission.c |  32 +++++---
 drivers/gpu/drm/i915/intel_engine_cs.c     |   2 +-
 drivers/gpu/drm/i915/intel_lrc.c           | 119 ++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  10 ++-
 7 files changed, 123 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index bd9abef40c66..9bc86d46a656 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3353,6 +3353,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 		if (i915.enable_execlists) {
 			u32 ptr, read, write;
 			struct rb_node *rb;
+			unsigned int idx;
 
 			seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
 				   I915_READ(RING_EXECLIST_STATUS_LO(engine)),
@@ -3370,8 +3371,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 			if (read > write)
 				write += GEN8_CSB_ENTRIES;
 			while (read < write) {
-				unsigned int idx = ++read % GEN8_CSB_ENTRIES;
-
+				idx = ++read % GEN8_CSB_ENTRIES;
 				seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
 					   idx,
 					   I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
@@ -3379,21 +3379,19 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 			}
 
 			rcu_read_lock();
-			rq = READ_ONCE(engine->execlist_port[0].request);
-			if (rq) {
-				seq_printf(m, "\t\tELSP[0] count=%d, ",
-					   engine->execlist_port[0].count);
-				print_request(m, rq, "rq: ");
-			} else {
-				seq_printf(m, "\t\tELSP[0] idle\n");
-			}
-			rq = READ_ONCE(engine->execlist_port[1].request);
-			if (rq) {
-				seq_printf(m, "\t\tELSP[1] count=%d, ",
-					   engine->execlist_port[1].count);
-				print_request(m, rq, "rq: ");
-			} else {
-				seq_printf(m, "\t\tELSP[1] idle\n");
+			for (idx = 0; idx < ARRAY_SIZE(engine->execlist_port); idx++) {
+				unsigned int count;
+
+				rq = port_unpack(&engine->execlist_port[idx],
+						 &count);
+				if (rq) {
+					seq_printf(m, "\t\tELSP[%d] count=%d, ",
+						   idx, count);
+					print_request(m, rq, "rq: ");
+				} else {
+					seq_printf(m, "\t\tELSP[%d] idle\n",
+						   idx);
+				}
 			}
 			rcu_read_unlock();
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b63c3d1ccd9f..75d7575b81f4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3019,12 +3019,14 @@ static void engine_set_wedged(struct intel_engine_cs *engine)
 	 */
 
 	if (i915.enable_execlists) {
+		struct execlist_port *port = engine->execlist_port;
 		unsigned long flags;
+		unsigned int n;
 
 		spin_lock_irqsave(&engine->timeline->lock, flags);
 
-		i915_gem_request_put(engine->execlist_port[0].request);
-		i915_gem_request_put(engine->execlist_port[1].request);
+		for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
+			i915_gem_request_put(port_request(&port[n]));
 		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
 		engine->execlist_queue = RB_ROOT;
 		engine->execlist_first = NULL;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index ec526d92f908..e18f350bc364 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1324,12 +1324,17 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 static void error_record_engine_execlists(struct intel_engine_cs *engine,
 					  struct drm_i915_error_engine *ee)
 {
+	const struct execlist_port *port = engine->execlist_port;
 	unsigned int n;
 
-	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
-		if (engine->execlist_port[n].request)
-			record_request(engine->execlist_port[n].request,
-				       &ee->execlist[n]);
+	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+		struct drm_i915_gem_request *rq = port_request(&port[n]);
+
+		if (!rq)
+			break;
+
+		record_request(rq, &ee->execlist[n]);
+	}
 }
 
 static void record_context(struct drm_i915_error_context *e,
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 7e85b5ab8ae2..f069c1faf9a9 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -653,10 +653,22 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq)
 	spin_unlock(&rq->lock);
 }
 
+static void port_assign(struct execlist_port *port,
+			struct drm_i915_gem_request *rq)
+{
+	GEM_BUG_ON(rq == port_request(port));
+
+	if (port_isset(port))
+		i915_gem_request_put(port_request(port));
+
+	port_set(port, i915_gem_request_get(rq));
+	nested_enable_signaling(rq);
+}
+
 static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 {
 	struct execlist_port *port = engine->execlist_port;
-	struct drm_i915_gem_request *last = port[0].request;
+	struct drm_i915_gem_request *last = port_request(port);
 	struct rb_node *rb;
 	bool submit = false;
 
@@ -670,8 +682,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 			if (port != engine->execlist_port)
 				break;
 
-			i915_gem_request_assign(&port->request, last);
-			nested_enable_signaling(last);
+			port_assign(port, last);
 			port++;
 		}
 
@@ -686,8 +697,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 		submit = true;
 	}
 	if (submit) {
-		i915_gem_request_assign(&port->request, last);
-		nested_enable_signaling(last);
+		port_assign(port, last);
 		engine->execlist_first = rb;
 	}
 	spin_unlock_irq(&engine->timeline->lock);
@@ -703,17 +713,19 @@ static void i915_guc_irq_handler(unsigned long data)
 	bool submit;
 
 	do {
-		rq = port[0].request;
+		rq = port_request(&port[0]);
 		while (rq && i915_gem_request_completed(rq)) {
 			trace_i915_gem_request_out(rq);
 			i915_gem_request_put(rq);
-			port[0].request = port[1].request;
-			port[1].request = NULL;
-			rq = port[0].request;
+
+			port[0] = port[1];
+			memset(&port[1], 0, sizeof(port[1]));
+
+			rq = port_request(&port[0]);
 		}
 
 		submit = false;
-		if (!port[1].request)
+		if (!port_count(&port[1]))
 			submit = i915_guc_dequeue(engine);
 	} while (submit);
 }
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 483ed7635692..c89f0e2030da 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1231,7 +1231,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 		return false;
 
 	/* Both ports drained, no more ELSP submission? */
-	if (engine->execlist_port[0].request)
+	if (port_request(&engine->execlist_port[0]))
 		return false;
 
 	/* Ring stopped? */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 319d9a8f37ca..9d789ade1308 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -337,39 +337,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
 	struct execlist_port *port = engine->execlist_port;
 	u32 __iomem *elsp =
-		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
-	u64 desc[2];
-
-	GEM_BUG_ON(port[0].count > 1);
-	if (!port[0].count)
-		execlists_context_status_change(port[0].request,
-						INTEL_CONTEXT_SCHEDULE_IN);
-	desc[0] = execlists_update_context(port[0].request);
-	GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0]));
-	port[0].count++;
-
-	if (port[1].request) {
-		GEM_BUG_ON(port[1].count);
-		execlists_context_status_change(port[1].request,
-						INTEL_CONTEXT_SCHEDULE_IN);
-		desc[1] = execlists_update_context(port[1].request);
-		GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1]));
-		port[1].count = 1;
-	} else {
-		desc[1] = 0;
-	}
-	GEM_BUG_ON(desc[0] == desc[1]);
+		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	unsigned int n;
 
-	/* You must always write both descriptors in the order below. */
-	writel(upper_32_bits(desc[1]), elsp);
-	writel(lower_32_bits(desc[1]), elsp);
+	for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
+		struct drm_i915_gem_request *rq;
+		unsigned int count;
+		u64 desc;
+
+		rq = port_unpack(&port[n], &count);
+		if (rq) {
+			GEM_BUG_ON(count > !n);
+			if (!count++)
+				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+			port_set(&port[n], port_pack(rq, count));
+			desc = execlists_update_context(rq);
+			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
+		} else {
+			GEM_BUG_ON(!n);
+			desc = 0;
+		}
 
-	writel(upper_32_bits(desc[0]), elsp);
-	/* The context is automatically loaded after the following */
-	writel(lower_32_bits(desc[0]), elsp);
+		writel(upper_32_bits(desc), elsp);
+		writel(lower_32_bits(desc), elsp);
+	}
 }
 
 static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@@ -390,6 +383,17 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
 	return true;
 }
 
+static void port_assign(struct execlist_port *port,
+			struct drm_i915_gem_request *rq)
+{
+	GEM_BUG_ON(rq == port_request(port));
+
+	if (port_isset(port))
+		i915_gem_request_put(port_request(port));
+
+	port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *last;
@@ -397,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	struct rb_node *rb;
 	bool submit = false;
 
-	last = port->request;
+	last = port_request(port);
 	if (last)
 		/* WaIdleLiteRestore:bdw,skl
 		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
@@ -407,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		 */
 		last->tail = last->wa_tail;
 
-	GEM_BUG_ON(port[1].request);
+	GEM_BUG_ON(port_isset(&port[1]));
 
 	/* Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -464,7 +468,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 			GEM_BUG_ON(last->ctx == cursor->ctx);
 
-			i915_gem_request_assign(&port->request, last);
+			if (submit)
+				port_assign(port, last);
 			port++;
 		}
 
@@ -479,7 +484,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		submit = true;
 	}
 	if (submit) {
-		i915_gem_request_assign(&port->request, last);
+		port_assign(port, last);
 		engine->execlist_first = rb;
 	}
 	spin_unlock_irq(&engine->timeline->lock);
@@ -488,16 +493,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		execlists_submit_ports(engine);
 }
 
-static bool execlists_elsp_idle(struct intel_engine_cs *engine)
-{
-	return !engine->execlist_port[0].request;
-}
-
 static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
 {
 	const struct execlist_port *port = engine->execlist_port;
 
-	return port[0].count + port[1].count < 2;
+	return port_count(&port[0]) + port_count(&port[1]) < 2;
 }
 
 /*
@@ -547,7 +547,9 @@ static void intel_lrc_irq_handler(unsigned long data)
 		tail = GEN8_CSB_WRITE_PTR(head);
 		head = GEN8_CSB_READ_PTR(head);
 		while (head != tail) {
+			struct drm_i915_gem_request *rq;
 			unsigned int status;
+			unsigned int count;
 
 			if (++head == GEN8_CSB_ENTRIES)
 				head = 0;
@@ -575,22 +577,26 @@ static void intel_lrc_irq_handler(unsigned long data)
 
 			/* Check the context/desc id for this event matches */
 			GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
-					 port[0].context_id);
+					 port->context_id);
 
-			GEM_BUG_ON(port[0].count == 0);
-			if (--port[0].count == 0) {
+			rq = port_unpack(port, &count);
+			GEM_BUG_ON(count == 0);
+			if (--count == 0) {
 				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-				GEM_BUG_ON(!i915_gem_request_completed(port[0].request));
-				execlists_context_status_change(port[0].request,
-								INTEL_CONTEXT_SCHEDULE_OUT);
+				GEM_BUG_ON(!i915_gem_request_completed(rq));
+				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+
+				trace_i915_gem_request_out(rq);
+				i915_gem_request_put(rq);
 
-				trace_i915_gem_request_out(port[0].request);
-				i915_gem_request_put(port[0].request);
 				port[0] = port[1];
 				memset(&port[1], 0, sizeof(port[1]));
+			} else {
+				port_set(port, port_pack(rq, count));
 			}
 
-			GEM_BUG_ON(port[0].count == 0 &&
+			/* After the final element, the hw should be idle */
+			GEM_BUG_ON(port_count(port) == 0 &&
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 		}
 
@@ -1147,6 +1153,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct execlist_port *port = engine->execlist_port;
 	unsigned int n;
+	bool submit;
 	int ret;
 
 	ret = intel_mocs_init_engine(engine);
@@ -1168,19 +1175,21 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	/* After a GPU reset, we may have requests to replay */
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 
+	submit = false;
 	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
-		if (!port[n].request)
+		if (!port_isset(&port[n]))
 			break;
 
 		DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
 				 engine->name, n,
-				 port[n].request->global_seqno);
+				 port_request(&port[n])->global_seqno);
 
 		/* Discard the current inflight count */
-		port[n].count = 0;
+		port_set(&port[n], port_request(&port[n]));
+		submit = true;
 	}
 
-	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine))
+	if (submit && !i915.enable_guc_submission)
 		execlists_submit_ports(engine);
 
 	return 0;
@@ -1258,13 +1267,13 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	intel_ring_update_space(request->ring);
 
 	/* Catch up with any missed context-switch interrupts */
-	if (request->ctx != port[0].request->ctx) {
-		i915_gem_request_put(port[0].request);
+	if (request->ctx != port_request(port)->ctx) {
+		i915_gem_request_put(port_request(port));
 		port[0] = port[1];
 		memset(&port[1], 0, sizeof(port[1]));
 	}
 
-	GEM_BUG_ON(request->ctx != port[0].request->ctx);
+	GEM_BUG_ON(request->ctx != port_request(port)->ctx);
 
 	/* Reset WaIdleLiteRestore:bdw,skl as well */
 	request->tail =
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ec16fb6fde62..c380f3ac8a39 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -368,8 +368,14 @@ struct intel_engine_cs {
 	/* Execlists */
 	struct tasklet_struct irq_tasklet;
 	struct execlist_port {
-		struct drm_i915_gem_request *request;
-		unsigned int count;
+		struct drm_i915_gem_request *request_count;
+#define EXECLIST_COUNT_BITS 2
+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
+#define port_set(p, packed) ((p)->request_count = (packed))
+#define port_isset(p) ((p)->request_count)
 		GEM_DEBUG_DECL(u32 context_id);
 	} execlist_port[2];
 	struct rb_root execlist_queue;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 06/12] drm/i915: Don't mark an execlists context-switch when idle
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (3 preceding siblings ...)
  2017-05-11 19:59 ` [PATCH 05/12] drm/i915/execlists: Pack the count into the low bits of the port.request Chris Wilson
@ 2017-05-11 19:59 ` Chris Wilson
  2017-05-11 19:59 ` [PATCH 07/12] drm/i915: Use a define for the default priority [0] Chris Wilson
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Chris Wilson @ 2017-05-11 19:59 UTC (permalink / raw)
  To: intel-gfx

If we *know* that the engine is idle, i.e. we have not more contexts in
flight, we can skip any spurious CSB idle interrupts. These spurious
interrupts seem to arrive long after we assert that the engines are
completely idle, triggering later assertions:

[  178.896646] intel_engine_is_idle(bcs): interrupt not handled, irq_posted=2
[  178.896655] ------------[ cut here ]------------
[  178.896658] kernel BUG at drivers/gpu/drm/i915/intel_engine_cs.c:226!
[  178.896661] invalid opcode: 0000 [#1] SMP
[  178.896663] Modules linked in: i915(E) x86_pkg_temp_thermal(E) crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) ghash_clmulni_intel(E) nls_ascii(E) nls_cp437(E) vfat(E) fat(E) intel_gtt(E) i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) fb_sys_fops(E) aesni_intel(E) prime_numbers(E) evdev(E) aes_x86_64(E) drm(E) crypto_simd(E) cryptd(E) glue_helper(E) mei_me(E) mei(E) lpc_ich(E) efivars(E) mfd_core(E) battery(E) video(E) acpi_pad(E) button(E) tpm_tis(E) tpm_tis_core(E) tpm(E) autofs4(E) i2c_i801(E) fan(E) thermal(E) i2c_designware_platform(E) i2c_designware_core(E)
[  178.896694] CPU: 1 PID: 522 Comm: gem_exec_whispe Tainted: G            E   4.11.0-rc5+ #14
[  178.896702] task: ffff88040aba8d40 task.stack: ffffc900003f0000
[  178.896722] RIP: 0010:intel_engine_init_global_seqno+0x1db/0x1f0 [i915]
[  178.896725] RSP: 0018:ffffc900003f3ab0 EFLAGS: 00010246
[  178.896728] RAX: 0000000000000000 RBX: ffff88040af54000 RCX: 0000000000000000
[  178.896731] RDX: ffff88041ec933e0 RSI: ffff88041ec8cc48 RDI: ffff88041ec8cc48
[  178.896734] RBP: ffffc900003f3ac8 R08: 0000000000000000 R09: 000000000000047d
[  178.896736] R10: 0000000000000040 R11: ffff88040b344f80 R12: 0000000000000000
[  178.896739] R13: ffff88040bce0000 R14: ffff88040bce52d8 R15: ffff88040bce0000
[  178.896742] FS:  00007f2cccc2d8c0(0000) GS:ffff88041ec80000(0000) knlGS:0000000000000000
[  178.896746] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  178.896749] CR2: 00007f41ddd8f000 CR3: 000000040bb03000 CR4: 00000000001406e0
[  178.896752] Call Trace:
[  178.896768]  reset_all_global_seqno.part.33+0x4e/0xd0 [i915]
[  178.896782]  i915_gem_request_alloc+0x304/0x330 [i915]
[  178.896795]  i915_gem_do_execbuffer+0x8a1/0x17d0 [i915]
[  178.896799]  ? remove_wait_queue+0x48/0x50
[  178.896812]  ? i915_wait_request+0x300/0x590 [i915]
[  178.896816]  ? wake_up_q+0x70/0x70
[  178.896819]  ? refcount_dec_and_test+0x11/0x20
[  178.896823]  ? reservation_object_add_excl_fence+0xa5/0x100
[  178.896835]  i915_gem_execbuffer2+0xab/0x1f0 [i915]
[  178.896844]  drm_ioctl+0x1e6/0x460 [drm]
[  178.896858]  ? i915_gem_execbuffer+0x260/0x260 [i915]
[  178.896862]  ? dput+0xcf/0x250
[  178.896866]  ? full_proxy_release+0x66/0x80
[  178.896869]  ? mntput+0x1f/0x30
[  178.896872]  do_vfs_ioctl+0x8f/0x5b0
[  178.896875]  ? ____fput+0x9/0x10
[  178.896878]  ? task_work_run+0x80/0xa0
[  178.896881]  SyS_ioctl+0x3c/0x70
[  178.896885]  entry_SYSCALL_64_fastpath+0x17/0x98
[  178.896888] RIP: 0033:0x7f2ccb455ca7
[  178.896890] RSP: 002b:00007ffcabec72d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[  178.896894] RAX: ffffffffffffffda RBX: 000055f897a44b90 RCX: 00007f2ccb455ca7
[  178.896897] RDX: 00007ffcabec74a0 RSI: 0000000040406469 RDI: 0000000000000003
[  178.896900] RBP: 00007f2ccb70a440 R08: 00007f2ccb70d0a4 R09: 0000000000000000
[  178.896903] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
[  178.896905] R13: 000055f89782d71a R14: 00007ffcabecf838 R15: 0000000000000003
[  178.896908] Code: 00 31 d2 4c 89 ef 8d 70 48 41 ff 95 f8 06 00 00 e9 68 fe ff ff be 0f 00 00 00 48 c7 c7 48 dc 37 a0 e8 fa 33 d6 e0 e9 0b ff ff ff <0f> 0b 0f 0b 0f 0b 0f 0b 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00

On the other hand, by ignoring the interrupt do we risk running out of
space in CSB ring? Testing for a few hours suggests not, i.e. that we
only seem to get the odd delayed CSB idle notification.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9f5ae1e938be..927b408e6e95 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1323,8 +1323,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 	bool tasklet = false;
 
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
-		set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-		tasklet = true;
+		if (port_count(&engine->execlist_port[0])) {
+			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+			tasklet = true;
+		}
 	}
 
 	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 07/12] drm/i915: Use a define for the default priority [0]
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (4 preceding siblings ...)
  2017-05-11 19:59 ` [PATCH 06/12] drm/i915: Don't mark an execlists context-switch when idle Chris Wilson
@ 2017-05-11 19:59 ` Chris Wilson
  2017-05-12  8:02   ` Mika Kuoppala
  2017-05-11 19:59 ` [PATCH 08/12] drm/i915: Split execlist priority queue into rbtree + linked list Chris Wilson
                   ` (9 subsequent siblings)
  15 siblings, 1 reply; 36+ messages in thread
From: Chris Wilson @ 2017-05-11 19:59 UTC (permalink / raw)
  To: intel-gfx

Explicitly assign the default priority, and give it a name. After much
discussion, we have chosen to call it I915_PRIORITY_NORMAL!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 1 +
 drivers/gpu/drm/i915/i915_gem_request.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 31a73c39239f..c5d1666d7071 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -199,6 +199,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	kref_init(&ctx->ref);
 	list_add_tail(&ctx->link, &dev_priv->context_list);
 	ctx->i915 = dev_priv;
+	ctx->priority = I915_PRIORITY_NORMAL;
 
 	/* Default context will never have a file_priv */
 	ret = DEFAULT_CONTEXT_HANDLE;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 4ccab5affd3c..0ecfc5e2d707 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -70,6 +70,7 @@ struct i915_priotree {
 	struct rb_node node;
 	int priority;
 #define I915_PRIORITY_MAX 1024
+#define I915_PRIORITY_NORMAL 0
 #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
 };
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 08/12] drm/i915: Split execlist priority queue into rbtree + linked list
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (5 preceding siblings ...)
  2017-05-11 19:59 ` [PATCH 07/12] drm/i915: Use a define for the default priority [0] Chris Wilson
@ 2017-05-11 19:59 ` Chris Wilson
  2017-05-15 11:45   ` Tvrtko Ursulin
  2017-05-15 12:56   ` [PATCH v2] " Chris Wilson
  2017-05-11 19:59 ` [PATCH 09/12] drm/i915: Create a kmem_cache to allocate struct i915_priolist from Chris Wilson
                   ` (8 subsequent siblings)
  15 siblings, 2 replies; 36+ messages in thread
From: Chris Wilson @ 2017-05-11 19:59 UTC (permalink / raw)
  To: intel-gfx

All the requests at the same priority are executed in FIFO order. They
do not need to be stored in the rbtree themselves, as they are a simple
list within a level. If we move the requests at one priority into a list,
we can then reduce the rbtree to the set of priorities. This should keep
the height of the rbtree small, as the number of active priorities can not
exceed the number of active requests and should be typically only a few.

Currently, we have ~2k possible different priority levels, that may
increase to allow even more fine grained selection. Allocating those in
advance seems a waste (and may be impossible), so we opt for allocating
upon first use, and freeing after its requests are depleted. To avoid
the possibility of an allocation failure causing us to lose a request,
we preallocate the default priority (0) and bump any request to that
priority if we fail to allocate it the appropriate plist. Having a
request (that is ready to run, so not leading to corruption) execute
out-of-order is better than leaking the request (and its dependency
tree) entirely.

There should be a benefit to reducing execlists_dequeue() to principally
using a simple list (and reducing the frequency of both rbtree iteration
and balancing on erase) but for typical workloads, request coalescing
should be small enough that we don't notice any change. The main gain is
from improving PI calls to schedule, and the explicit list within a
level should make request unwinding simpler (we just need to insert at
the head of the list rather than the tail and not have to make the
rbtree search more complicated).

v2: Avoid use-after-free when deleting a depleted priolist

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  11 +-
 drivers/gpu/drm/i915/i915_gem_request.c    |   4 +-
 drivers/gpu/drm/i915/i915_gem_request.h    |   2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |  50 +++++----
 drivers/gpu/drm/i915/i915_utils.h          |   9 ++
 drivers/gpu/drm/i915/intel_lrc.c           | 171 ++++++++++++++++++-----------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   7 ++
 7 files changed, 165 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9bc86d46a656..6d61a439efd0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3352,7 +3352,6 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 
 		if (i915.enable_execlists) {
 			u32 ptr, read, write;
-			struct rb_node *rb;
 			unsigned int idx;
 
 			seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
@@ -3396,9 +3395,13 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 			rcu_read_unlock();
 
 			spin_lock_irq(&engine->timeline->lock);
-			for (rb = engine->execlist_first; rb; rb = rb_next(rb)) {
-				rq = rb_entry(rb, typeof(*rq), priotree.node);
-				print_request(m, rq, "\t\tQ ");
+			for (rb = engine->execlist_first; rb; rb = rb_next(rb)){
+				struct i915_priolist *p =
+					rb_entry(rb, typeof(*p), node);
+
+				list_for_each_entry(rq, &p->requests,
+						    priotree.link)
+					print_request(m, rq, "\t\tQ ");
 			}
 			spin_unlock_irq(&engine->timeline->lock);
 		} else if (INTEL_GEN(dev_priv) > 6) {
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 10361c7e3b37..1ccf2522cdfd 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -159,7 +159,7 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
 {
 	struct i915_dependency *dep, *next;
 
-	GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
+	GEM_BUG_ON(!list_empty(&pt->link));
 
 	/* Everyone we depended upon (the fences we wait to be signaled)
 	 * should retire before us and remove themselves from our list.
@@ -185,7 +185,7 @@ i915_priotree_init(struct i915_priotree *pt)
 {
 	INIT_LIST_HEAD(&pt->signalers_list);
 	INIT_LIST_HEAD(&pt->waiters_list);
-	RB_CLEAR_NODE(&pt->node);
+	INIT_LIST_HEAD(&pt->link);
 	pt->priority = INT_MIN;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 0ecfc5e2d707..8c508bd9088e 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -67,7 +67,7 @@ struct i915_dependency {
 struct i915_priotree {
 	struct list_head signalers_list; /* those before us, we depend upon */
 	struct list_head waiters_list; /* those after us, they depend upon us */
-	struct rb_node node;
+	struct list_head link;
 	int priority;
 #define I915_PRIORITY_MAX 1024
 #define I915_PRIORITY_NORMAL 0
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index f069c1faf9a9..440bab856cc2 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -674,32 +674,42 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 
 	spin_lock_irq(&engine->timeline->lock);
 	rb = engine->execlist_first;
+	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
 	while (rb) {
-		struct drm_i915_gem_request *rq =
-			rb_entry(rb, typeof(*rq), priotree.node);
-
-		if (last && rq->ctx != last->ctx) {
-			if (port != engine->execlist_port)
-				break;
-
-			port_assign(port, last);
-			port++;
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct drm_i915_gem_request *rq, *rn;
+
+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+			if (last && rq->ctx != last->ctx) {
+				if (port != engine->execlist_port) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				port_assign(port, last);
+				port++;
+			}
+
+			INIT_LIST_HEAD(&rq->priotree.link);
+			rq->priotree.priority = INT_MAX;
+
+			i915_guc_submit(rq);
+			trace_i915_gem_request_in(rq, port - engine->execlist_port);
+			last = rq;
+			submit = true;
 		}
 
 		rb = rb_next(rb);
-		rb_erase(&rq->priotree.node, &engine->execlist_queue);
-		RB_CLEAR_NODE(&rq->priotree.node);
-		rq->priotree.priority = INT_MAX;
-
-		i915_guc_submit(rq);
-		trace_i915_gem_request_in(rq, port - engine->execlist_port);
-		last = rq;
-		submit = true;
+		rb_erase(&p->node, &engine->execlist_queue);
+		INIT_LIST_HEAD(&p->requests);
+		if (p->priority != I915_PRIORITY_NORMAL)
+			kfree(p);
 	}
-	if (submit) {
+done:
+	engine->execlist_first = rb;
+	if (submit)
 		port_assign(port, last);
-		engine->execlist_first = rb;
-	}
 	spin_unlock_irq(&engine->timeline->lock);
 
 	return submit;
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index d9df23795f9a..16ecd1ab108d 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -105,4 +105,13 @@
 	__idx;								\
 })
 
+#include <linux/list.h>
+
+static inline void __list_del_many(struct list_head *head,
+				   struct list_head *first)
+{
+	first->prev = head;
+	WRITE_ONCE(head->next, first);
+}
+
 #endif /* !__I915_UTILS_H */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9d789ade1308..849b35796de7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -436,57 +436,76 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 	spin_lock_irq(&engine->timeline->lock);
 	rb = engine->execlist_first;
+	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
 	while (rb) {
-		struct drm_i915_gem_request *cursor =
-			rb_entry(rb, typeof(*cursor), priotree.node);
-
-		/* Can we combine this request with the current port? It has to
-		 * be the same context/ringbuffer and not have any exceptions
-		 * (e.g. GVT saying never to combine contexts).
-		 *
-		 * If we can combine the requests, we can execute both by
-		 * updating the RING_TAIL to point to the end of the second
-		 * request, and so we never need to tell the hardware about
-		 * the first.
-		 */
-		if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
-			/* If we are on the second port and cannot combine
-			 * this request with the last, then we are done.
-			 */
-			if (port != engine->execlist_port)
-				break;
-
-			/* If GVT overrides us we only ever submit port[0],
-			 * leaving port[1] empty. Note that we also have
-			 * to be careful that we don't queue the same
-			 * context (even though a different request) to
-			 * the second port.
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct drm_i915_gem_request *rq, *rn;
+
+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+			/*
+			 * Can we combine this request with the current port?
+			 * It has to be the same context/ringbuffer and not
+			 * have any exceptions (e.g. GVT saying never to
+			 * combine contexts).
+			 *
+			 * If we can combine the requests, we can execute both
+			 * by updating the RING_TAIL to point to the end of the
+			 * second request, and so we never need to tell the
+			 * hardware about the first.
 			 */
-			if (ctx_single_port_submission(last->ctx) ||
-			    ctx_single_port_submission(cursor->ctx))
-				break;
+			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
+				/*
+				 * If we are on the second port and cannot
+				 * combine this request with the last, then we
+				 * are done.
+				 */
+				if (port != engine->execlist_port) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				/*
+				 * If GVT overrides us we only ever submit
+				 * port[0], leaving port[1] empty. Note that we
+				 * also have to be careful that we don't queue
+				 * the same context (even though a different
+				 * request) to the second port.
+				 */
+				if (ctx_single_port_submission(last->ctx) ||
+				    ctx_single_port_submission(rq->ctx)) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				GEM_BUG_ON(last->ctx == rq->ctx);
+
+				if (submit)
+					port_assign(port, last);
+				port++;
+			}
 
-			GEM_BUG_ON(last->ctx == cursor->ctx);
+			INIT_LIST_HEAD(&rq->priotree.link);
+			rq->priotree.priority = INT_MAX;
 
-			if (submit)
-				port_assign(port, last);
-			port++;
+			__i915_gem_request_submit(rq);
+			trace_i915_gem_request_in(rq,
+						  port - engine->execlist_port);
+			last = rq;
+			submit = true;
 		}
 
 		rb = rb_next(rb);
-		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
-		RB_CLEAR_NODE(&cursor->priotree.node);
-		cursor->priotree.priority = INT_MAX;
-
-		__i915_gem_request_submit(cursor);
-		trace_i915_gem_request_in(cursor, port - engine->execlist_port);
-		last = cursor;
-		submit = true;
+		rb_erase(&p->node, &engine->execlist_queue);
+		INIT_LIST_HEAD(&p->requests);
+		if (p->priority != I915_PRIORITY_NORMAL)
+			kfree(p);
 	}
-	if (submit) {
+done:
+	engine->execlist_first = rb;
+	if (submit)
 		port_assign(port, last);
-		engine->execlist_first = rb;
-	}
 	spin_unlock_irq(&engine->timeline->lock);
 
 	if (submit)
@@ -610,28 +629,53 @@ static void intel_lrc_irq_handler(unsigned long data)
 	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
 }
 
-static bool insert_request(struct i915_priotree *pt, struct rb_root *root)
+static bool
+insert_request(struct intel_engine_cs *engine,
+	       struct i915_priotree *pt,
+	       int prio)
 {
-	struct rb_node **p, *rb;
+	struct i915_priolist *p;
+	struct rb_node **parent, *rb;
 	bool first = true;
 
+find_priolist:
 	/* most positive priority is scheduled first, equal priorities fifo */
 	rb = NULL;
-	p = &root->rb_node;
-	while (*p) {
-		struct i915_priotree *pos;
-
-		rb = *p;
-		pos = rb_entry(rb, typeof(*pos), node);
-		if (pt->priority > pos->priority) {
-			p = &rb->rb_left;
-		} else {
-			p = &rb->rb_right;
+	parent = &engine->execlist_queue.rb_node;
+	while (*parent) {
+		rb = *parent;
+		p = rb_entry(rb, typeof(*p), node);
+		if (prio > p->priority) {
+			parent = &rb->rb_left;
+		} else if (prio < p->priority) {
+			parent = &rb->rb_right;
 			first = false;
+		} else {
+			list_add_tail(&pt->link, &p->requests);
+			return false;
+		}
+	}
+
+	if (prio == I915_PRIORITY_NORMAL) {
+		p = &engine->default_priolist;
+	} else {
+		p = kmalloc(sizeof(*p), GFP_ATOMIC);
+		/* Convert an allocation failure to a priority bump */
+		if (unlikely(!p)) {
+			prio = I915_PRIORITY_NORMAL; /* recurses just once */
+			goto find_priolist;
 		}
 	}
-	rb_link_node(&pt->node, rb, p);
-	rb_insert_color(&pt->node, root);
+
+	p->priority = prio;
+	rb_link_node(&p->node, rb, parent);
+	rb_insert_color(&p->node, &engine->execlist_queue);
+
+	INIT_LIST_HEAD(&p->requests);
+	list_add_tail(&pt->link, &p->requests);
+
+	if (first)
+		engine->execlist_first = &p->node;
 
 	return first;
 }
@@ -644,12 +688,16 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 	/* Will be called from irq-context when using foreign fences. */
 	spin_lock_irqsave(&engine->timeline->lock, flags);
 
-	if (insert_request(&request->priotree, &engine->execlist_queue)) {
-		engine->execlist_first = &request->priotree.node;
+	if (insert_request(engine,
+			   &request->priotree,
+			   request->priotree.priority)) {
 		if (execlists_elsp_ready(engine))
 			tasklet_hi_schedule(&engine->irq_tasklet);
 	}
 
+	GEM_BUG_ON(!engine->execlist_first);
+	GEM_BUG_ON(list_empty(&request->priotree.link));
+
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
@@ -734,10 +782,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 			continue;
 
 		pt->priority = prio;
-		if (!RB_EMPTY_NODE(&pt->node)) {
-			rb_erase(&pt->node, &engine->execlist_queue);
-			if (insert_request(pt, &engine->execlist_queue))
-				engine->execlist_first = &pt->node;
+		if (!list_empty(&pt->link)) {
+			__list_del_entry(&pt->link);
+			insert_request(engine, pt, prio);
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c380f3ac8a39..0ca6f5c3f9a7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -177,6 +177,12 @@ enum intel_engine_id {
 	VECS
 };
 
+struct i915_priolist {
+	struct rb_node node;
+	struct list_head requests;
+	int priority;
+};
+
 #define INTEL_ENGINE_CS_MAX_NAME 8
 
 struct intel_engine_cs {
@@ -367,6 +373,7 @@ struct intel_engine_cs {
 
 	/* Execlists */
 	struct tasklet_struct irq_tasklet;
+	struct i915_priolist default_priolist;
 	struct execlist_port {
 		struct drm_i915_gem_request *request_count;
 #define EXECLIST_COUNT_BITS 2
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 09/12] drm/i915: Create a kmem_cache to allocate struct i915_priolist from
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (6 preceding siblings ...)
  2017-05-11 19:59 ` [PATCH 08/12] drm/i915: Split execlist priority queue into rbtree + linked list Chris Wilson
@ 2017-05-11 19:59 ` Chris Wilson
  2017-05-16  7:57   ` Tvrtko Ursulin
  2017-05-11 19:59 ` [PATCH 10/12] drm/i915/execlists: Reduce lock contention between schedule/submit_request Chris Wilson
                   ` (7 subsequent siblings)
  15 siblings, 1 reply; 36+ messages in thread
From: Chris Wilson @ 2017-05-11 19:59 UTC (permalink / raw)
  To: intel-gfx

The i915_priolist are allocated within an atomic context on a path where
we wish to minimise latency. If we use a dedicated kmem_cache, we have
the advantage of a local freelist from which to service new requests
that should keep the latency impact of an allocation small. Though
currently we expect the majority of requests to be at default priority
(and so hit the preallocate priolist), once userspace starts using
priorities they are likely to use many fine grained policies improving
the utilisation of a private slab.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            | 1 +
 drivers/gpu/drm/i915/i915_gem.c            | 9 ++++++++-
 drivers/gpu/drm/i915/i915_guc_submission.c | 2 +-
 drivers/gpu/drm/i915/intel_lrc.c           | 4 ++--
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ff3574a56812..7f192efbe088 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2027,6 +2027,7 @@ struct drm_i915_private {
 	struct kmem_cache *vmas;
 	struct kmem_cache *requests;
 	struct kmem_cache *dependencies;
+	struct kmem_cache *priorities;
 
 	const struct intel_device_info info;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 75d7575b81f4..f52d72b1a5b4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4871,12 +4871,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	if (!dev_priv->dependencies)
 		goto err_requests;
 
+	dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
+	if (!dev_priv->priorities)
+		goto err_dependencies;
+
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	INIT_LIST_HEAD(&dev_priv->gt.timelines);
 	err = i915_gem_timeline_init__global(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	if (err)
-		goto err_dependencies;
+		goto err_priorities;
 
 	INIT_LIST_HEAD(&dev_priv->context_list);
 	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
@@ -4900,6 +4904,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 
 	return 0;
 
+err_priorities:
+	kmem_cache_destroy(dev_priv->priorities);
 err_dependencies:
 	kmem_cache_destroy(dev_priv->dependencies);
 err_requests:
@@ -4923,6 +4929,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
 	WARN_ON(!list_empty(&dev_priv->gt.timelines));
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
 	kmem_cache_destroy(dev_priv->requests);
 	kmem_cache_destroy(dev_priv->vmas);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 440bab856cc2..53c022c47687 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -704,7 +704,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 		rb_erase(&p->node, &engine->execlist_queue);
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
-			kfree(p);
+			kmem_cache_free(engine->i915->priorities, p);
 	}
 done:
 	engine->execlist_first = rb;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 849b35796de7..7e41529bd074 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -500,7 +500,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		rb_erase(&p->node, &engine->execlist_queue);
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
-			kfree(p);
+			kmem_cache_free(engine->i915->priorities, p);
 	}
 done:
 	engine->execlist_first = rb;
@@ -659,7 +659,7 @@ insert_request(struct intel_engine_cs *engine,
 	if (prio == I915_PRIORITY_NORMAL) {
 		p = &engine->default_priolist;
 	} else {
-		p = kmalloc(sizeof(*p), GFP_ATOMIC);
+		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
 		/* Convert an allocation failure to a priority bump */
 		if (unlikely(!p)) {
 			prio = I915_PRIORITY_NORMAL; /* recurses just once */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 10/12] drm/i915/execlists: Reduce lock contention between schedule/submit_request
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (7 preceding siblings ...)
  2017-05-11 19:59 ` [PATCH 09/12] drm/i915: Create a kmem_cache to allocate struct i915_priolist from Chris Wilson
@ 2017-05-11 19:59 ` Chris Wilson
  2017-05-15 10:51   ` Tvrtko Ursulin
  2017-05-11 19:59 ` [PATCH 11/12] drm/i915: Stop inlining the execlists IRQ handler Chris Wilson
                   ` (6 subsequent siblings)
  15 siblings, 1 reply; 36+ messages in thread
From: Chris Wilson @ 2017-05-11 19:59 UTC (permalink / raw)
  To: intel-gfx

If we do not require to perform priority bumping, and we haven't yet
submitted the request, we can update its priority in situ and skip
acquiring the engine locks -- thus avoiding any contention between us
and submit/execute.

v2: Remove the stack element from the list if we can do the early
assignment.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7e41529bd074..3cfb4b145b75 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -767,6 +767,19 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		list_safe_reset_next(dep, p, dfs_link);
 	}
 
+	/* If we didn't need to bump any existing priorities, and we haven't
+	 * yet submitted this request (i.e. there is no potential race with
+	 * execlists_submit_request()), we can set our own priority and skip
+	 * acquiring the engine locks.
+	 */
+	if (request->priotree.priority == INT_MIN) {
+		GEM_BUG_ON(!list_empty(&request->priotree.link));
+		request->priotree.priority = prio;
+		if (stack.dfs_link.next == stack.dfs_link.prev)
+			return;
+		__list_del_entry(&stack.dfs_link);
+	}
+
 	engine = request->engine;
 	spin_lock_irq(&engine->timeline->lock);
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 11/12] drm/i915: Stop inlining the execlists IRQ handler
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (8 preceding siblings ...)
  2017-05-11 19:59 ` [PATCH 10/12] drm/i915/execlists: Reduce lock contention between schedule/submit_request Chris Wilson
@ 2017-05-11 19:59 ` Chris Wilson
  2017-05-11 19:59 ` [PATCH 12/12] drm/i915: Don't force serialisation on marking up execlists irq posted Chris Wilson
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Chris Wilson @ 2017-05-11 19:59 UTC (permalink / raw)
  To: intel-gfx

As the handler is now quite complex, involving a few atomics, the cost
of the function preamble is negligible in comparison and so we should
leave the function out-of-line for better I$.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 927b408e6e95..a58152dd7021 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1317,7 +1317,7 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 		ivybridge_parity_error_irq_handler(dev_priv, gt_iir);
 }
 
-static __always_inline void
+static void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 {
 	bool tasklet = false;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 12/12] drm/i915: Don't force serialisation on marking up execlists irq posted
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (9 preceding siblings ...)
  2017-05-11 19:59 ` [PATCH 11/12] drm/i915: Stop inlining the execlists IRQ handler Chris Wilson
@ 2017-05-11 19:59 ` Chris Wilson
  2017-05-12  7:16   ` Mika Kuoppala
  2017-05-15 10:33   ` Tvrtko Ursulin
  2017-05-11 20:17 ` ✓ Fi.CI.BAT: success for series starting with [01/12] drm/i915: Remove kref from i915_sw_fence Patchwork
                   ` (4 subsequent siblings)
  15 siblings, 2 replies; 36+ messages in thread
From: Chris Wilson @ 2017-05-11 19:59 UTC (permalink / raw)
  To: intel-gfx

Since we coordinate with the execlists tasklet using a locked schedule
operation that ensures that after we set the engine->irq_posted we
always have an invocation of the tasklet, we do not need to use a locked
operation to set the engine->irq_posted itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a58152dd7021..7b7f55a28eec 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1324,7 +1324,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
 		if (port_count(&engine->execlist_port[0])) {
-			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 			tasklet = true;
 		}
 	}
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/12] drm/i915: Remove kref from i915_sw_fence
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (10 preceding siblings ...)
  2017-05-11 19:59 ` [PATCH 12/12] drm/i915: Don't force serialisation on marking up execlists irq posted Chris Wilson
@ 2017-05-11 20:17 ` Patchwork
  2017-05-12  7:43 ` [PATCH 01/12] " Mika Kuoppala
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 36+ messages in thread
From: Patchwork @ 2017-05-11 20:17 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/12] drm/i915: Remove kref from i915_sw_fence
URL   : https://patchwork.freedesktop.org/series/24314/
State : success

== Summary ==

Series 24314v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/24314/revisions/1/mbox/

Test gem_exec_suspend:
        Subgroup basic-s4-devices:
                pass       -> DMESG-WARN (fi-kbl-7560u) fdo#100125

fdo#100125 https://bugs.freedesktop.org/show_bug.cgi?id=100125

fi-bdw-5557u     total:278  pass:267  dwarn:0   dfail:0   fail:0   skip:11  time:437s
fi-bdw-gvtdvm    total:278  pass:256  dwarn:8   dfail:0   fail:0   skip:14  time:432s
fi-bsw-n3050     total:278  pass:242  dwarn:0   dfail:0   fail:0   skip:36  time:573s
fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time:520s
fi-bxt-t5700     total:278  pass:258  dwarn:0   dfail:0   fail:0   skip:20  time:551s
fi-byt-j1900     total:278  pass:254  dwarn:0   dfail:0   fail:0   skip:24  time:492s
fi-byt-n2820     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:484s
fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:411s
fi-hsw-4770r     total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:408s
fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:0   skip:50  time:423s
fi-ivb-3520m     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:511s
fi-ivb-3770      total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:478s
fi-kbl-7500u     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:463s
fi-kbl-7560u     total:278  pass:267  dwarn:1   dfail:0   fail:0   skip:10  time:566s
fi-skl-6260u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:462s
fi-skl-6700hq    total:278  pass:261  dwarn:0   dfail:0   fail:0   skip:17  time:587s
fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18  time:470s
fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:501s
fi-skl-gvtdvm    total:278  pass:265  dwarn:0   dfail:0   fail:0   skip:13  time:436s
fi-snb-2520m     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:542s
fi-snb-2600      total:278  pass:249  dwarn:0   dfail:0   fail:0   skip:29  time:404s

51350ea90d1a5c8bb7e6532eb61c64a57e83ab9b drm-tip: 2017y-05m-11d-13h-05m-06s UTC integration manifest
4d4ebd6 drm/i915: Don't force serialisation on marking up execlists irq posted
353d685 drm/i915: Stop inlining the execlists IRQ handler
9232c0b drm/i915/execlists: Reduce lock contention between schedule/submit_request
a6ab30e drm/i915: Create a kmem_cache to allocate struct i915_priolist from
09a6b9c drm/i915: Split execlist priority queue into rbtree + linked list
13ceaca drm/i915: Use a define for the default priority [0]
7385461 drm/i915: Don't mark an execlists context-switch when idle
8041c0a drm/i915/execlists: Pack the count into the low bits of the port.request
63ef1b6 drm/i915: Redefine ptr_pack_bits() and friends
3d633ac drm/i915: Make ptr_unpack_bits() more function-like
b9faf5c drm/i915: Import the kfence selftests for i915_sw_fence
34f9729 drm/i915: Remove kref from i915_sw_fence

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4677/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 12/12] drm/i915: Don't force serialisation on marking up execlists irq posted
  2017-05-11 19:59 ` [PATCH 12/12] drm/i915: Don't force serialisation on marking up execlists irq posted Chris Wilson
@ 2017-05-12  7:16   ` Mika Kuoppala
  2017-05-15 10:33   ` Tvrtko Ursulin
  1 sibling, 0 replies; 36+ messages in thread
From: Mika Kuoppala @ 2017-05-12  7:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Since we coordinate with the execlists tasklet using a locked schedule
> operation that ensures that after we set the engine->irq_posted we
> always have an invocation of the tasklet, we do not need to use a locked
> operation to set the engine->irq_posted itself.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

This has r-b from Tvrtko.
-Mika

> ---
>  drivers/gpu/drm/i915/i915_irq.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index a58152dd7021..7b7f55a28eec 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1324,7 +1324,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
>  
>  	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
>  		if (port_count(&engine->execlist_port[0])) {
> -			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> +			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>  			tasklet = true;
>  		}
>  	}
> -- 
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (11 preceding siblings ...)
  2017-05-11 20:17 ` ✓ Fi.CI.BAT: success for series starting with [01/12] drm/i915: Remove kref from i915_sw_fence Patchwork
@ 2017-05-12  7:43 ` Mika Kuoppala
  2017-05-12  8:19   ` Chris Wilson
  2017-05-12 12:13 ` Mika Kuoppala
                   ` (2 subsequent siblings)
  15 siblings, 1 reply; 36+ messages in thread
From: Mika Kuoppala @ 2017-05-12  7:43 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> My original intention was for i915_sw_fence to be the base class and
> provide the reference count for the container. This was from starting
> with a design to handle async_work. In practice, for i915 we embed
> fences into structs which have their own independent reference counting,
> making the i915_sw_fence.kref duplicitous. If we remove the kref, we
> remove the i915_sw_fence's ability to free itself and its independence,
> it can only exist within a container and must be supplied with a
> callback to handle its release.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_sw_fence.c | 55 ++++++++----------------------------
>  drivers/gpu/drm/i915/i915_sw_fence.h |  1 -
>  2 files changed, 11 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> index a277f8eb7beb..a0a690d6627e 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.c
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -120,34 +120,6 @@ void i915_sw_fence_fini(struct i915_sw_fence *fence)
>  }
>  #endif
>  
> -static void i915_sw_fence_release(struct kref *kref)
> -{
> -	struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
> -
> -	WARN_ON(atomic_read(&fence->pending) > 0);
> -	debug_fence_destroy(fence);
> -
> -	if (fence->flags & I915_SW_FENCE_MASK) {
> -		__i915_sw_fence_notify(fence, FENCE_FREE);

Is the current design so that the callback equals embeddding?

-Mika

> -	} else {
> -		i915_sw_fence_fini(fence);
> -		kfree(fence);
> -	}
> -}
> -
> -static void i915_sw_fence_put(struct i915_sw_fence *fence)
> -{
> -	debug_fence_assert(fence);
> -	kref_put(&fence->kref, i915_sw_fence_release);
> -}
> -
> -static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence)
> -{
> -	debug_fence_assert(fence);
> -	kref_get(&fence->kref);
> -	return fence;
> -}
> -
>  static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
>  					struct list_head *continuation)
>  {
> @@ -202,13 +174,15 @@ static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
>  
>  	debug_fence_set_state(fence, DEBUG_FENCE_IDLE, DEBUG_FENCE_NOTIFY);
>  
> -	if (fence->flags & I915_SW_FENCE_MASK &&
> -	    __i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
> +	if (__i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
>  		return;
>  
>  	debug_fence_set_state(fence, DEBUG_FENCE_NOTIFY, DEBUG_FENCE_IDLE);
>  
>  	__i915_sw_fence_wake_up_all(fence, continuation);
> +
> +	debug_fence_destroy(fence);
> +	__i915_sw_fence_notify(fence, FENCE_FREE);
>  }
>  
>  static void i915_sw_fence_complete(struct i915_sw_fence *fence)
> @@ -232,33 +206,26 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
>  			  const char *name,
>  			  struct lock_class_key *key)
>  {
> -	BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
> +	BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK);
>  
>  	debug_fence_init(fence);
>  
>  	__init_waitqueue_head(&fence->wait, name, key);
> -	kref_init(&fence->kref);
>  	atomic_set(&fence->pending, 1);
>  	fence->flags = (unsigned long)fn;
>  }
>  
> -static void __i915_sw_fence_commit(struct i915_sw_fence *fence)
> -{
> -	i915_sw_fence_complete(fence);
> -	i915_sw_fence_put(fence);
> -}
> -
>  void i915_sw_fence_commit(struct i915_sw_fence *fence)
>  {
>  	debug_fence_activate(fence);
> -	__i915_sw_fence_commit(fence);
> +	i915_sw_fence_complete(fence);
>  }
>  
>  static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
>  {
>  	list_del(&wq->task_list);
>  	__i915_sw_fence_complete(wq->private, key);
> -	i915_sw_fence_put(wq->private);
> +
>  	if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
>  		kfree(wq);
>  	return 0;
> @@ -353,7 +320,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
>  	INIT_LIST_HEAD(&wq->task_list);
>  	wq->flags = pending;
>  	wq->func = i915_sw_fence_wake;
> -	wq->private = i915_sw_fence_get(fence);
> +	wq->private = fence;
>  
>  	i915_sw_fence_await(fence);
>  
> @@ -402,7 +369,7 @@ static void timer_i915_sw_fence_wake(unsigned long data)
>  	dma_fence_put(cb->dma);
>  	cb->dma = NULL;
>  
> -	__i915_sw_fence_commit(cb->fence);
> +	i915_sw_fence_complete(cb->fence);
>  	cb->timer.function = NULL;
>  }
>  
> @@ -413,7 +380,7 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma,
>  
>  	del_timer_sync(&cb->timer);
>  	if (cb->timer.function)
> -		__i915_sw_fence_commit(cb->fence);
> +		i915_sw_fence_complete(cb->fence);
>  	dma_fence_put(cb->dma);
>  
>  	kfree(cb);
> @@ -440,7 +407,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
>  		return dma_fence_wait(dma, false);
>  	}
>  
> -	cb->fence = i915_sw_fence_get(fence);
> +	cb->fence = fence;
>  	i915_sw_fence_await(fence);
>  
>  	cb->dma = NULL;
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
> index d31cefbbcc04..1d3b6051daaf 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.h
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.h
> @@ -23,7 +23,6 @@ struct reservation_object;
>  struct i915_sw_fence {
>  	wait_queue_head_t wait;
>  	unsigned long flags;
> -	struct kref kref;
>  	atomic_t pending;
>  };
>  
> -- 
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 07/12] drm/i915: Use a define for the default priority [0]
  2017-05-11 19:59 ` [PATCH 07/12] drm/i915: Use a define for the default priority [0] Chris Wilson
@ 2017-05-12  8:02   ` Mika Kuoppala
  0 siblings, 0 replies; 36+ messages in thread
From: Mika Kuoppala @ 2017-05-12  8:02 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Explicitly assign the default priority, and give it a name. After much
> discussion, we have chosen to call it I915_PRIORITY_NORMAL!
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gem_context.c | 1 +
>  drivers/gpu/drm/i915/i915_gem_request.h | 1 +
>  2 files changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 31a73c39239f..c5d1666d7071 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -199,6 +199,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>  	kref_init(&ctx->ref);
>  	list_add_tail(&ctx->link, &dev_priv->context_list);
>  	ctx->i915 = dev_priv;
> +	ctx->priority = I915_PRIORITY_NORMAL;
>  
>  	/* Default context will never have a file_priv */
>  	ret = DEFAULT_CONTEXT_HANDLE;
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 4ccab5affd3c..0ecfc5e2d707 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -70,6 +70,7 @@ struct i915_priotree {
>  	struct rb_node node;
>  	int priority;
>  #define I915_PRIORITY_MAX 1024
> +#define I915_PRIORITY_NORMAL 0
>  #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
>  };
>  
> -- 
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence
  2017-05-12  7:43 ` [PATCH 01/12] " Mika Kuoppala
@ 2017-05-12  8:19   ` Chris Wilson
  0 siblings, 0 replies; 36+ messages in thread
From: Chris Wilson @ 2017-05-12  8:19 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Fri, May 12, 2017 at 10:43:31AM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > My original intention was for i915_sw_fence to be the base class and
> > provide the reference count for the container. This was from starting
> > with a design to handle async_work. In practice, for i915 we embed
> > fences into structs which have their own independent reference counting,
> > making the i915_sw_fence.kref duplicitous. If we remove the kref, we
> > remove the i915_sw_fence's ability to free itself and its independence,
> > it can only exist within a container and must be supplied with a
> > callback to handle its release.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_sw_fence.c | 55 ++++++++----------------------------
> >  drivers/gpu/drm/i915/i915_sw_fence.h |  1 -
> >  2 files changed, 11 insertions(+), 45 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> > index a277f8eb7beb..a0a690d6627e 100644
> > --- a/drivers/gpu/drm/i915/i915_sw_fence.c
> > +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> > @@ -120,34 +120,6 @@ void i915_sw_fence_fini(struct i915_sw_fence *fence)
> >  }
> >  #endif
> >  
> > -static void i915_sw_fence_release(struct kref *kref)
> > -{
> > -	struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
> > -
> > -	WARN_ON(atomic_read(&fence->pending) > 0);
> > -	debug_fence_destroy(fence);
> > -
> > -	if (fence->flags & I915_SW_FENCE_MASK) {
> > -		__i915_sw_fence_notify(fence, FENCE_FREE);
> 
> Is the current design so that the callback equals embeddding?

To embed, in 99.999% of cases you really do have to employ the callback
to resolve lifetime. Yes, it is possible by explicitly calling wait
that you do know its state; and that is useful for on-stack fences.
However, the current usage of debugobjects negects that (on-stack
objects are banned), so we do a need constructor for
i915_sw_fence_init_onstack(), so mandating a callback simplifies
everyone's lives.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (12 preceding siblings ...)
  2017-05-12  7:43 ` [PATCH 01/12] " Mika Kuoppala
@ 2017-05-12 12:13 ` Mika Kuoppala
  2017-05-15 11:47 ` ✓ Fi.CI.BAT: success for series starting with [01/12] drm/i915: Remove kref from i915_sw_fence (rev2) Patchwork
  2017-05-15 14:01 ` ✓ Fi.CI.BAT: success for series starting with [01/12] drm/i915: Remove kref from i915_sw_fence (rev4) Patchwork
  15 siblings, 0 replies; 36+ messages in thread
From: Mika Kuoppala @ 2017-05-12 12:13 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> My original intention was for i915_sw_fence to be the base class and
> provide the reference count for the container. This was from starting
> with a design to handle async_work. In practice, for i915 we embed
> fences into structs which have their own independent reference counting,
> making the i915_sw_fence.kref duplicitous. If we remove the kref, we
> remove the i915_sw_fence's ability to free itself and its independence,
> it can only exist within a container and must be supplied with a
> callback to handle its release.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_sw_fence.c | 55 ++++++++----------------------------
>  drivers/gpu/drm/i915/i915_sw_fence.h |  1 -
>  2 files changed, 11 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> index a277f8eb7beb..a0a690d6627e 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.c
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -120,34 +120,6 @@ void i915_sw_fence_fini(struct i915_sw_fence *fence)
>  }
>  #endif
>  
> -static void i915_sw_fence_release(struct kref *kref)
> -{
> -	struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
> -
> -	WARN_ON(atomic_read(&fence->pending) > 0);
> -	debug_fence_destroy(fence);
> -
> -	if (fence->flags & I915_SW_FENCE_MASK) {
> -		__i915_sw_fence_notify(fence, FENCE_FREE);
> -	} else {
> -		i915_sw_fence_fini(fence);
> -		kfree(fence);
> -	}
> -}
> -
> -static void i915_sw_fence_put(struct i915_sw_fence *fence)
> -{
> -	debug_fence_assert(fence);
> -	kref_put(&fence->kref, i915_sw_fence_release);
> -}
> -
> -static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence)
> -{
> -	debug_fence_assert(fence);
> -	kref_get(&fence->kref);
> -	return fence;
> -}
> -
>  static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
>  					struct list_head *continuation)
>  {
> @@ -202,13 +174,15 @@ static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
>  
>  	debug_fence_set_state(fence, DEBUG_FENCE_IDLE, DEBUG_FENCE_NOTIFY);
>  
> -	if (fence->flags & I915_SW_FENCE_MASK &&
> -	    __i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
> +	if (__i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
>  		return;
>  
>  	debug_fence_set_state(fence, DEBUG_FENCE_NOTIFY, DEBUG_FENCE_IDLE);
>  
>  	__i915_sw_fence_wake_up_all(fence, continuation);
> +
> +	debug_fence_destroy(fence);
> +	__i915_sw_fence_notify(fence, FENCE_FREE);
>  }
>  
>  static void i915_sw_fence_complete(struct i915_sw_fence *fence)
> @@ -232,33 +206,26 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
>  			  const char *name,
>  			  struct lock_class_key *key)
>  {
> -	BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
> +	BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK);
>  
>  	debug_fence_init(fence);
>  
>  	__init_waitqueue_head(&fence->wait, name, key);
> -	kref_init(&fence->kref);
>  	atomic_set(&fence->pending, 1);
>  	fence->flags = (unsigned long)fn;
>  }
>  
> -static void __i915_sw_fence_commit(struct i915_sw_fence *fence)
> -{
> -	i915_sw_fence_complete(fence);
> -	i915_sw_fence_put(fence);
> -}
> -
>  void i915_sw_fence_commit(struct i915_sw_fence *fence)
>  {
>  	debug_fence_activate(fence);
> -	__i915_sw_fence_commit(fence);
> +	i915_sw_fence_complete(fence);
>  }
>  
>  static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
>  {
>  	list_del(&wq->task_list);
>  	__i915_sw_fence_complete(wq->private, key);
> -	i915_sw_fence_put(wq->private);
> +
>  	if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
>  		kfree(wq);
>  	return 0;
> @@ -353,7 +320,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
>  	INIT_LIST_HEAD(&wq->task_list);
>  	wq->flags = pending;
>  	wq->func = i915_sw_fence_wake;
> -	wq->private = i915_sw_fence_get(fence);
> +	wq->private = fence;
>  
>  	i915_sw_fence_await(fence);
>  
> @@ -402,7 +369,7 @@ static void timer_i915_sw_fence_wake(unsigned long data)
>  	dma_fence_put(cb->dma);
>  	cb->dma = NULL;
>  
> -	__i915_sw_fence_commit(cb->fence);
> +	i915_sw_fence_complete(cb->fence);
>  	cb->timer.function = NULL;
>  }
>  
> @@ -413,7 +380,7 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma,
>  
>  	del_timer_sync(&cb->timer);
>  	if (cb->timer.function)
> -		__i915_sw_fence_commit(cb->fence);
> +		i915_sw_fence_complete(cb->fence);
>  	dma_fence_put(cb->dma);
>  
>  	kfree(cb);
> @@ -440,7 +407,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
>  		return dma_fence_wait(dma, false);
>  	}
>  
> -	cb->fence = i915_sw_fence_get(fence);
> +	cb->fence = fence;
>  	i915_sw_fence_await(fence);
>  
>  	cb->dma = NULL;
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
> index d31cefbbcc04..1d3b6051daaf 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.h
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.h
> @@ -23,7 +23,6 @@ struct reservation_object;
>  struct i915_sw_fence {
>  	wait_queue_head_t wait;
>  	unsigned long flags;
> -	struct kref kref;
>  	atomic_t pending;
>  };
>  
> -- 
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 12/12] drm/i915: Don't force serialisation on marking up execlists irq posted
  2017-05-11 19:59 ` [PATCH 12/12] drm/i915: Don't force serialisation on marking up execlists irq posted Chris Wilson
  2017-05-12  7:16   ` Mika Kuoppala
@ 2017-05-15 10:33   ` Tvrtko Ursulin
  1 sibling, 0 replies; 36+ messages in thread
From: Tvrtko Ursulin @ 2017-05-15 10:33 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 11/05/2017 20:59, Chris Wilson wrote:
> Since we coordinate with the execlists tasklet using a locked schedule
> operation that ensures that after we set the engine->irq_posted we
> always have an invocation of the tasklet, we do not need to use a locked
> operation to set the engine->irq_posted itself.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index a58152dd7021..7b7f55a28eec 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1324,7 +1324,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
>
>  	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
>  		if (port_count(&engine->execlist_port[0])) {
> -			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> +			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>  			tasklet = true;
>  		}
>  	}
>

As Mika said - once more:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

:)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/12] drm/i915/execlists: Reduce lock contention between schedule/submit_request
  2017-05-11 19:59 ` [PATCH 10/12] drm/i915/execlists: Reduce lock contention between schedule/submit_request Chris Wilson
@ 2017-05-15 10:51   ` Tvrtko Ursulin
  2017-05-15 10:55     ` Chris Wilson
  0 siblings, 1 reply; 36+ messages in thread
From: Tvrtko Ursulin @ 2017-05-15 10:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 11/05/2017 20:59, Chris Wilson wrote:
> If we do not require to perform priority bumping, and we haven't yet
> submitted the request, we can update its priority in situ and skip
> acquiring the engine locks -- thus avoiding any contention between us
> and submit/execute.
>
> v2: Remove the stack element from the list if we can do the early
> assignment.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 7e41529bd074..3cfb4b145b75 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -767,6 +767,19 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  		list_safe_reset_next(dep, p, dfs_link);
>  	}
>
> +	/* If we didn't need to bump any existing priorities, and we haven't
> +	 * yet submitted this request (i.e. there is no potential race with
> +	 * execlists_submit_request()), we can set our own priority and skip
> +	 * acquiring the engine locks.
> +	 */
> +	if (request->priotree.priority == INT_MIN) {
> +		GEM_BUG_ON(!list_empty(&request->priotree.link));
> +		request->priotree.priority = prio;
> +		if (stack.dfs_link.next == stack.dfs_link.prev)
> +			return;
> +		__list_del_entry(&stack.dfs_link);

Hm is early processing the out of queue request with no dependencies 
safe? Recently you fixed a race in this area, which AFAIK was about 
doing that outside the engine lock and then request becoming ready in 
parallel, racing with the FIFO order.

Regards,

Tvrtko

> +	}
> +
>  	engine = request->engine;
>  	spin_lock_irq(&engine->timeline->lock);
>
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH v2] drm/i915: Import the kfence selftests for i915_sw_fence
  2017-05-11 19:59 ` [PATCH 02/12] drm/i915: Import the kfence selftests for i915_sw_fence Chris Wilson
@ 2017-05-15 10:52   ` Chris Wilson
  2017-05-16  8:55     ` Chris Wilson
  2017-05-16 14:49     ` Mika Kuoppala
  0 siblings, 2 replies; 36+ messages in thread
From: Chris Wilson @ 2017-05-15 10:52 UTC (permalink / raw)
  To: intel-gfx

A long time ago, I wrote some selftests for the struct kfence idea. Now
that we have infrastructure in i915/igt for running kselftests, include
some for i915_sw_fence.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig.debug                 |  12 +
 drivers/gpu/drm/i915/i915_sw_fence.c               |   7 +-
 .../gpu/drm/i915/selftests/i915_mock_selftests.h   |   1 +
 drivers/gpu/drm/i915/selftests/i915_sw_fence.c     | 576 +++++++++++++++++++++
 4 files changed, 595 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/i915_sw_fence.c

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index b00edd3b8800..78c5c049a347 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -61,6 +61,18 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS
 
           If in doubt, say "N".
 
+config DRM_I915_SW_FENCE_CHECK_DAG
+        bool "Enable additional driver debugging for detecting dependency cycles"
+        depends on DRM_I915
+        default n
+        help
+          Choose this option to turn on extra driver debugging that may affect
+          performance but will catch some internal issues.
+
+          Recommended for driver developers only.
+
+          If in doubt, say "N".
+
 config DRM_I915_SELFTEST
 	bool "Enable selftests upon driver load"
 	depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index a0a690d6627e..474d23c0c0ce 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -12,6 +12,7 @@
 #include <linux/reservation.h>
 
 #include "i915_sw_fence.h"
+#include "i915_selftest.h"
 
 #define I915_SW_FENCE_FLAG_ALLOC BIT(3) /* after WQ_FLAG_* for safety */
 
@@ -274,7 +275,7 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 	unsigned long flags;
 	bool err;
 
-	if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG))
+	if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
 		return false;
 
 	spin_lock_irqsave(&i915_sw_fence_lock, flags);
@@ -490,3 +491,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 
 	return ret;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_sw_fence.c"
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 76c1f149a0a0..fc74687501ba 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -9,6 +9,7 @@
  * Tests are executed in order by igt/drv_selftest
  */
 selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */
+selftest(fence, i915_sw_fence_mock_selftests)
 selftest(scatterlist, scatterlist_mock_selftests)
 selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
new file mode 100644
index 000000000000..c552c23eecff
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
@@ -0,0 +1,576 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/completion.h>
+#include <linux/delay.h>
+
+#include "../i915_selftest.h"
+
+static int __i915_sw_fence_call
+fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	switch (state) {
+	case FENCE_COMPLETE:
+		break;
+
+	case FENCE_FREE:
+		/* Leave the fence for the caller to free it after testing */
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct i915_sw_fence *alloc_fence(void)
+{
+	struct i915_sw_fence *fence;
+
+	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	i915_sw_fence_init(fence, fence_notify);
+	return fence;
+}
+
+static void free_fence(struct i915_sw_fence *fence)
+{
+	i915_sw_fence_fini(fence);
+	kfree(fence);
+}
+
+static int __test_self(struct i915_sw_fence *fence)
+{
+	if (i915_sw_fence_done(fence))
+		return -EINVAL;
+
+	i915_sw_fence_commit(fence);
+	if (!i915_sw_fence_done(fence))
+		return -EINVAL;
+
+	i915_sw_fence_wait(fence);
+	if (!i915_sw_fence_done(fence))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int test_self(void *arg)
+{
+	struct i915_sw_fence *fence;
+	int ret;
+
+	/* Test i915_sw_fence signaling and completion testing */
+	fence = alloc_fence();
+	if (!fence)
+		return -ENOMEM;
+
+	ret = __test_self(fence);
+
+	free_fence(fence);
+	return ret;
+}
+
+static int test_dag(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret = -EINVAL;
+
+	/* Test detection of cycles within the i915_sw_fence graphs */
+	if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
+		return 0;
+
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	if (i915_sw_fence_await_sw_fence_gfp(A, A, GFP_KERNEL) != -EINVAL) {
+		pr_err("recursive cycle not detected (AA)\n");
+		goto err_A;
+	}
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+	if (i915_sw_fence_await_sw_fence_gfp(B, A, GFP_KERNEL) != -EINVAL) {
+		pr_err("single depth cycle not detected (BAB)\n");
+		goto err_B;
+	}
+
+	C = alloc_fence();
+	if (i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL) == -EINVAL) {
+		pr_err("invalid cycle detected\n");
+		goto err_C;
+	}
+	if (i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL) != -EINVAL) {
+		pr_err("single depth cycle not detected (CBC)\n");
+		goto err_C;
+	}
+	if (i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL) != -EINVAL) {
+		pr_err("cycle not detected (BA, CB, AC)\n");
+		goto err_C;
+	}
+	if (i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL) == -EINVAL) {
+		pr_err("invalid cycle detected\n");
+		goto err_C;
+	}
+
+	i915_sw_fence_commit(A);
+	i915_sw_fence_commit(B);
+	i915_sw_fence_commit(C);
+
+	ret = 0;
+	if (!i915_sw_fence_done(C)) {
+		pr_err("fence C not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(B)) {
+		pr_err("fence B not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(A)) {
+		pr_err("fence A not done\n");
+		ret = -EINVAL;
+	}
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_AB(void *arg)
+{
+	struct i915_sw_fence *A, *B;
+	int ret;
+
+	/* Test i915_sw_fence (A) waiting on an event source (B) */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+	if (ret < 0)
+		goto err_B;
+	if (ret == 0) {
+		pr_err("Incorrectly reported fence A was complete before await\n");
+		ret = -EINVAL;
+		goto err_B;
+	}
+
+	ret = -EINVAL;
+	i915_sw_fence_commit(A);
+	if (i915_sw_fence_done(A))
+		goto err_B;
+
+	i915_sw_fence_commit(B);
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B is not done\n");
+		goto err_B;
+	}
+
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A is not done\n");
+		goto err_B;
+	}
+
+	ret = 0;
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_ABC(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret;
+
+	/* Test a chain of fences, A waits on B who waits on C */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	C = alloc_fence();
+	if (!C) {
+		ret = -ENOMEM;
+		goto err_B;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		pr_err("Incorrectly reported fence B was complete before await\n");
+		goto err_C;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		pr_err("Incorrectly reported fence C was complete before await\n");
+		goto err_C;
+	}
+
+	ret = -EINVAL;
+	i915_sw_fence_commit(A);
+	if (i915_sw_fence_done(A)) {
+		pr_err("Fence A completed early\n");
+		goto err_C;
+	}
+
+	i915_sw_fence_commit(B);
+	if (i915_sw_fence_done(B)) {
+		pr_err("Fence B completed early\n");
+		goto err_C;
+	}
+
+	if (i915_sw_fence_done(A)) {
+		pr_err("Fence A completed early (after signaling B)\n");
+		goto err_C;
+	}
+
+	i915_sw_fence_commit(C);
+
+	ret = 0;
+	if (!i915_sw_fence_done(C)) {
+		pr_err("Fence C not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A not done\n");
+		ret = -EINVAL;
+	}
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_AB_C(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret = -EINVAL;
+
+	/* Test multiple fences (AB) waiting on a single event (C) */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	C = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_B;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	i915_sw_fence_commit(A);
+	i915_sw_fence_commit(B);
+
+	ret = 0;
+	if (i915_sw_fence_done(A)) {
+		pr_err("Fence A completed early\n");
+		ret = -EINVAL;
+	}
+
+	if (i915_sw_fence_done(B)) {
+		pr_err("Fence B completed early\n");
+		ret = -EINVAL;
+	}
+
+	i915_sw_fence_commit(C);
+	if (!i915_sw_fence_done(C)) {
+		pr_err("Fence C not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A not done\n");
+		ret = -EINVAL;
+	}
+
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_C_AB(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret;
+
+	/* Test multiple event sources (A,B) for a single fence (C) */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	C = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_B;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	ret = 0;
+	i915_sw_fence_commit(C);
+	if (i915_sw_fence_done(C))
+		ret = -EINVAL;
+
+	i915_sw_fence_commit(A);
+	i915_sw_fence_commit(B);
+
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(C)) {
+		pr_err("Fence C not done\n");
+		ret = -EINVAL;
+	}
+
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_chain(void *arg)
+{
+	int nfences = 4096;
+	struct i915_sw_fence **fences;
+	int ret, i;
+
+	/* Test a long chain of fences */
+	fences = kmalloc_array(nfences, sizeof(*fences), GFP_KERNEL);
+	if (!fences)
+		return -ENOMEM;
+
+	for (i = 0; i < nfences; i++) {
+		fences[i] = alloc_fence();
+		if (!fences[i]) {
+			nfences = i;
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		if (i > 0) {
+			ret = i915_sw_fence_await_sw_fence_gfp(fences[i],
+							       fences[i - 1],
+							       GFP_KERNEL);
+			if (ret < 0) {
+				nfences = i + 1;
+				goto err;
+			}
+
+			i915_sw_fence_commit(fences[i]);
+		}
+	}
+
+	ret = 0;
+	for (i = nfences; --i; ) {
+		if (i915_sw_fence_done(fences[i])) {
+			if (ret == 0)
+				pr_err("Fence[%d] completed early\n", i);
+			ret = -EINVAL;
+		}
+	}
+	i915_sw_fence_commit(fences[0]);
+	for (i = 0; ret == 0 && i < nfences; i++) {
+		if (!i915_sw_fence_done(fences[i])) {
+			pr_err("Fence[%d] is not done\n", i);
+			ret = -EINVAL;
+		}
+	}
+
+err:
+	for (i = 0; i < nfences; i++)
+		free_fence(fences[i]);
+	kfree(fences);
+	return ret;
+}
+
+struct task_ipc {
+	struct work_struct work;
+	struct completion started;
+	struct i915_sw_fence *in, *out;
+	int value;
+};
+
+static void task_ipc(struct work_struct *work)
+{
+	struct task_ipc *ipc = container_of(work, typeof(*ipc), work);
+
+	complete(&ipc->started);
+
+	i915_sw_fence_wait(ipc->in);
+	smp_store_mb(ipc->value, 1);
+	i915_sw_fence_commit(ipc->out);
+}
+
+static int test_ipc(void *arg)
+{
+	struct task_ipc ipc;
+	int ret = 0;
+
+	/* Test use of i915_sw_fence as an interprocess signaling mechanism */
+	ipc.in = alloc_fence();
+	if (!ipc.in)
+		return -ENOMEM;
+	ipc.out = alloc_fence();
+	if (!ipc.out) {
+		ret = -ENOMEM;
+		goto err_in;
+	}
+
+	/* use a completion to avoid chicken-and-egg testing */
+	init_completion(&ipc.started);
+
+	ipc.value = 0;
+	INIT_WORK(&ipc.work, task_ipc);
+	schedule_work(&ipc.work);
+
+	wait_for_completion(&ipc.started);
+
+	usleep_range(1000, 2000);
+	if (READ_ONCE(ipc.value)) {
+		pr_err("worker updated value before i915_sw_fence was signaled\n");
+		ret = -EINVAL;
+	}
+
+	i915_sw_fence_commit(ipc.in);
+	i915_sw_fence_wait(ipc.out);
+
+	if (!READ_ONCE(ipc.value)) {
+		pr_err("worker signaled i915_sw_fence before value was posted\n");
+		ret = -EINVAL;
+	}
+
+	flush_work(&ipc.work);
+	free_fence(ipc.out);
+err_in:
+	free_fence(ipc.in);
+	return ret;
+}
+
+int i915_sw_fence_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(test_self),
+		SUBTEST(test_dag),
+		SUBTEST(test_AB),
+		SUBTEST(test_ABC),
+		SUBTEST(test_AB_C),
+		SUBTEST(test_C_AB),
+		SUBTEST(test_chain),
+		SUBTEST(test_ipc),
+	};
+
+	return i915_subtests(tests, NULL);
+}
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/12] drm/i915/execlists: Reduce lock contention between schedule/submit_request
  2017-05-15 10:51   ` Tvrtko Ursulin
@ 2017-05-15 10:55     ` Chris Wilson
  2017-05-15 11:46       ` Tvrtko Ursulin
  0 siblings, 1 reply; 36+ messages in thread
From: Chris Wilson @ 2017-05-15 10:55 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Mon, May 15, 2017 at 11:51:52AM +0100, Tvrtko Ursulin wrote:
> 
> On 11/05/2017 20:59, Chris Wilson wrote:
> >If we do not require to perform priority bumping, and we haven't yet
> >submitted the request, we can update its priority in situ and skip
> >acquiring the engine locks -- thus avoiding any contention between us
> >and submit/execute.
> >
> >v2: Remove the stack element from the list if we can do the early
> >assignment.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> > drivers/gpu/drm/i915/intel_lrc.c | 13 +++++++++++++
> > 1 file changed, 13 insertions(+)
> >
> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index 7e41529bd074..3cfb4b145b75 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -767,6 +767,19 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
> > 		list_safe_reset_next(dep, p, dfs_link);
> > 	}
> >
> >+	/* If we didn't need to bump any existing priorities, and we haven't
> >+	 * yet submitted this request (i.e. there is no potential race with
> >+	 * execlists_submit_request()), we can set our own priority and skip
> >+	 * acquiring the engine locks.
> >+	 */
> >+	if (request->priotree.priority == INT_MIN) {
> >+		GEM_BUG_ON(!list_empty(&request->priotree.link));
> >+		request->priotree.priority = prio;
> >+		if (stack.dfs_link.next == stack.dfs_link.prev)
> >+			return;
> >+		__list_del_entry(&stack.dfs_link);
> 
> Hm is early processing the out of queue request with no dependencies
> safe? Recently you fixed a race in this area, which AFAIK was about
> doing that outside the engine lock and then request becoming ready
> in parallel, racing with the FIFO order.

Yes. In this case we know that we have commited the request as we are in
i915_add_request (the only time we may schedule with prio == INT_MIN).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 08/12] drm/i915: Split execlist priority queue into rbtree + linked list
  2017-05-11 19:59 ` [PATCH 08/12] drm/i915: Split execlist priority queue into rbtree + linked list Chris Wilson
@ 2017-05-15 11:45   ` Tvrtko Ursulin
  2017-05-15 12:26     ` Chris Wilson
  2017-05-15 12:56   ` [PATCH v2] " Chris Wilson
  1 sibling, 1 reply; 36+ messages in thread
From: Tvrtko Ursulin @ 2017-05-15 11:45 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 11/05/2017 20:59, Chris Wilson wrote:
> All the requests at the same priority are executed in FIFO order. They
> do not need to be stored in the rbtree themselves, as they are a simple
> list within a level. If we move the requests at one priority into a list,
> we can then reduce the rbtree to the set of priorities. This should keep
> the height of the rbtree small, as the number of active priorities can not
> exceed the number of active requests and should be typically only a few.
>
> Currently, we have ~2k possible different priority levels, that may
> increase to allow even more fine grained selection. Allocating those in
> advance seems a waste (and may be impossible), so we opt for allocating
> upon first use, and freeing after its requests are depleted. To avoid
> the possibility of an allocation failure causing us to lose a request,
> we preallocate the default priority (0) and bump any request to that
> priority if we fail to allocate it the appropriate plist. Having a
> request (that is ready to run, so not leading to corruption) execute
> out-of-order is better than leaking the request (and its dependency
> tree) entirely.
>
> There should be a benefit to reducing execlists_dequeue() to principally
> using a simple list (and reducing the frequency of both rbtree iteration
> and balancing on erase) but for typical workloads, request coalescing
> should be small enough that we don't notice any change. The main gain is
> from improving PI calls to schedule, and the explicit list within a
> level should make request unwinding simpler (we just need to insert at
> the head of the list rather than the tail and not have to make the
> rbtree search more complicated).
>
> v2: Avoid use-after-free when deleting a depleted priolist
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c        |  11 +-
>  drivers/gpu/drm/i915/i915_gem_request.c    |   4 +-
>  drivers/gpu/drm/i915/i915_gem_request.h    |   2 +-
>  drivers/gpu/drm/i915/i915_guc_submission.c |  50 +++++----
>  drivers/gpu/drm/i915/i915_utils.h          |   9 ++
>  drivers/gpu/drm/i915/intel_lrc.c           | 171 ++++++++++++++++++-----------
>  drivers/gpu/drm/i915/intel_ringbuffer.h    |   7 ++
>  7 files changed, 165 insertions(+), 89 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 9bc86d46a656..6d61a439efd0 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3352,7 +3352,6 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>
>  		if (i915.enable_execlists) {
>  			u32 ptr, read, write;
> -			struct rb_node *rb;
>  			unsigned int idx;
>
>  			seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
> @@ -3396,9 +3395,13 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>  			rcu_read_unlock();
>
>  			spin_lock_irq(&engine->timeline->lock);
> -			for (rb = engine->execlist_first; rb; rb = rb_next(rb)) {
> -				rq = rb_entry(rb, typeof(*rq), priotree.node);
> -				print_request(m, rq, "\t\tQ ");
> +			for (rb = engine->execlist_first; rb; rb = rb_next(rb)){
> +				struct i915_priolist *p =
> +					rb_entry(rb, typeof(*p), node);
> +
> +				list_for_each_entry(rq, &p->requests,
> +						    priotree.link)
> +					print_request(m, rq, "\t\tQ ");
>  			}
>  			spin_unlock_irq(&engine->timeline->lock);
>  		} else if (INTEL_GEN(dev_priv) > 6) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 10361c7e3b37..1ccf2522cdfd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -159,7 +159,7 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
>  {
>  	struct i915_dependency *dep, *next;
>
> -	GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
> +	GEM_BUG_ON(!list_empty(&pt->link));
>
>  	/* Everyone we depended upon (the fences we wait to be signaled)
>  	 * should retire before us and remove themselves from our list.
> @@ -185,7 +185,7 @@ i915_priotree_init(struct i915_priotree *pt)
>  {
>  	INIT_LIST_HEAD(&pt->signalers_list);
>  	INIT_LIST_HEAD(&pt->waiters_list);
> -	RB_CLEAR_NODE(&pt->node);
> +	INIT_LIST_HEAD(&pt->link);
>  	pt->priority = INT_MIN;
>  }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 0ecfc5e2d707..8c508bd9088e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -67,7 +67,7 @@ struct i915_dependency {
>  struct i915_priotree {
>  	struct list_head signalers_list; /* those before us, we depend upon */
>  	struct list_head waiters_list; /* those after us, they depend upon us */
> -	struct rb_node node;
> +	struct list_head link;
>  	int priority;
>  #define I915_PRIORITY_MAX 1024
>  #define I915_PRIORITY_NORMAL 0
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index f069c1faf9a9..440bab856cc2 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -674,32 +674,42 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
>
>  	spin_lock_irq(&engine->timeline->lock);
>  	rb = engine->execlist_first;
> +	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
>  	while (rb) {
> -		struct drm_i915_gem_request *rq =
> -			rb_entry(rb, typeof(*rq), priotree.node);
> -
> -		if (last && rq->ctx != last->ctx) {
> -			if (port != engine->execlist_port)
> -				break;
> -
> -			port_assign(port, last);
> -			port++;
> +		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> +		struct drm_i915_gem_request *rq, *rn;
> +
> +		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> +			if (last && rq->ctx != last->ctx) {
> +				if (port != engine->execlist_port) {
> +					__list_del_many(&p->requests,
> +							&rq->priotree.link);
> +					goto done;
> +				}
> +
> +				port_assign(port, last);
> +				port++;
> +			}
> +
> +			INIT_LIST_HEAD(&rq->priotree.link);
> +			rq->priotree.priority = INT_MAX;
> +
> +			i915_guc_submit(rq);
> +			trace_i915_gem_request_in(rq, port - engine->execlist_port);
> +			last = rq;
> +			submit = true;
>  		}
>
>  		rb = rb_next(rb);
> -		rb_erase(&rq->priotree.node, &engine->execlist_queue);
> -		RB_CLEAR_NODE(&rq->priotree.node);
> -		rq->priotree.priority = INT_MAX;
> -
> -		i915_guc_submit(rq);
> -		trace_i915_gem_request_in(rq, port - engine->execlist_port);
> -		last = rq;
> -		submit = true;
> +		rb_erase(&p->node, &engine->execlist_queue);
> +		INIT_LIST_HEAD(&p->requests);
> +		if (p->priority != I915_PRIORITY_NORMAL)
> +			kfree(p);
>  	}
> -	if (submit) {
> +done:
> +	engine->execlist_first = rb;
> +	if (submit)
>  		port_assign(port, last);
> -		engine->execlist_first = rb;
> -	}
>  	spin_unlock_irq(&engine->timeline->lock);
>
>  	return submit;
> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
> index d9df23795f9a..16ecd1ab108d 100644
> --- a/drivers/gpu/drm/i915/i915_utils.h
> +++ b/drivers/gpu/drm/i915/i915_utils.h
> @@ -105,4 +105,13 @@
>  	__idx;								\
>  })
>
> +#include <linux/list.h>
> +
> +static inline void __list_del_many(struct list_head *head,
> +				   struct list_head *first)
> +{
> +	first->prev = head;
> +	WRITE_ONCE(head->next, first);
> +}
> +
>  #endif /* !__I915_UTILS_H */
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 9d789ade1308..849b35796de7 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -436,57 +436,76 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>
>  	spin_lock_irq(&engine->timeline->lock);
>  	rb = engine->execlist_first;
> +	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
>  	while (rb) {
> -		struct drm_i915_gem_request *cursor =
> -			rb_entry(rb, typeof(*cursor), priotree.node);
> -
> -		/* Can we combine this request with the current port? It has to
> -		 * be the same context/ringbuffer and not have any exceptions
> -		 * (e.g. GVT saying never to combine contexts).
> -		 *
> -		 * If we can combine the requests, we can execute both by
> -		 * updating the RING_TAIL to point to the end of the second
> -		 * request, and so we never need to tell the hardware about
> -		 * the first.
> -		 */
> -		if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
> -			/* If we are on the second port and cannot combine
> -			 * this request with the last, then we are done.
> -			 */
> -			if (port != engine->execlist_port)
> -				break;
> -
> -			/* If GVT overrides us we only ever submit port[0],
> -			 * leaving port[1] empty. Note that we also have
> -			 * to be careful that we don't queue the same
> -			 * context (even though a different request) to
> -			 * the second port.
> +		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> +		struct drm_i915_gem_request *rq, *rn;
> +
> +		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> +			/*
> +			 * Can we combine this request with the current port?
> +			 * It has to be the same context/ringbuffer and not
> +			 * have any exceptions (e.g. GVT saying never to
> +			 * combine contexts).
> +			 *
> +			 * If we can combine the requests, we can execute both
> +			 * by updating the RING_TAIL to point to the end of the
> +			 * second request, and so we never need to tell the
> +			 * hardware about the first.
>  			 */
> -			if (ctx_single_port_submission(last->ctx) ||
> -			    ctx_single_port_submission(cursor->ctx))
> -				break;
> +			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
> +				/*
> +				 * If we are on the second port and cannot
> +				 * combine this request with the last, then we
> +				 * are done.
> +				 */
> +				if (port != engine->execlist_port) {
> +					__list_del_many(&p->requests,
> +							&rq->priotree.link);
> +					goto done;
> +				}
> +
> +				/*
> +				 * If GVT overrides us we only ever submit
> +				 * port[0], leaving port[1] empty. Note that we
> +				 * also have to be careful that we don't queue
> +				 * the same context (even though a different
> +				 * request) to the second port.
> +				 */
> +				if (ctx_single_port_submission(last->ctx) ||
> +				    ctx_single_port_submission(rq->ctx)) {
> +					__list_del_many(&p->requests,
> +							&rq->priotree.link);
> +					goto done;
> +				}
> +
> +				GEM_BUG_ON(last->ctx == rq->ctx);
> +
> +				if (submit)
> +					port_assign(port, last);
> +				port++;
> +			}
>
> -			GEM_BUG_ON(last->ctx == cursor->ctx);
> +			INIT_LIST_HEAD(&rq->priotree.link);
> +			rq->priotree.priority = INT_MAX;
>
> -			if (submit)
> -				port_assign(port, last);
> -			port++;
> +			__i915_gem_request_submit(rq);
> +			trace_i915_gem_request_in(rq,
> +						  port - engine->execlist_port);
> +			last = rq;
> +			submit = true;
>  		}
>
>  		rb = rb_next(rb);
> -		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
> -		RB_CLEAR_NODE(&cursor->priotree.node);
> -		cursor->priotree.priority = INT_MAX;
> -
> -		__i915_gem_request_submit(cursor);
> -		trace_i915_gem_request_in(cursor, port - engine->execlist_port);
> -		last = cursor;
> -		submit = true;
> +		rb_erase(&p->node, &engine->execlist_queue);
> +		INIT_LIST_HEAD(&p->requests);
> +		if (p->priority != I915_PRIORITY_NORMAL)
> +			kfree(p);
>  	}
> -	if (submit) {
> +done:
> +	engine->execlist_first = rb;
> +	if (submit)
>  		port_assign(port, last);
> -		engine->execlist_first = rb;
> -	}
>  	spin_unlock_irq(&engine->timeline->lock);
>
>  	if (submit)
> @@ -610,28 +629,53 @@ static void intel_lrc_irq_handler(unsigned long data)
>  	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
>  }
>
> -static bool insert_request(struct i915_priotree *pt, struct rb_root *root)
> +static bool
> +insert_request(struct intel_engine_cs *engine,
> +	       struct i915_priotree *pt,
> +	       int prio)
>  {
> -	struct rb_node **p, *rb;
> +	struct i915_priolist *p;
> +	struct rb_node **parent, *rb;
>  	bool first = true;
>
> +find_priolist:
>  	/* most positive priority is scheduled first, equal priorities fifo */
>  	rb = NULL;
> -	p = &root->rb_node;
> -	while (*p) {
> -		struct i915_priotree *pos;
> -
> -		rb = *p;
> -		pos = rb_entry(rb, typeof(*pos), node);
> -		if (pt->priority > pos->priority) {
> -			p = &rb->rb_left;
> -		} else {
> -			p = &rb->rb_right;
> +	parent = &engine->execlist_queue.rb_node;
> +	while (*parent) {
> +		rb = *parent;
> +		p = rb_entry(rb, typeof(*p), node);
> +		if (prio > p->priority) {
> +			parent = &rb->rb_left;
> +		} else if (prio < p->priority) {
> +			parent = &rb->rb_right;
>  			first = false;
> +		} else {
> +			list_add_tail(&pt->link, &p->requests);
> +			return false;
> +		}
> +	}
> +
> +	if (prio == I915_PRIORITY_NORMAL) {
> +		p = &engine->default_priolist;
> +	} else {
> +		p = kmalloc(sizeof(*p), GFP_ATOMIC);

I am back to being nervous about this.

I don't see that even with a dedicated slab we can control the number of 
spare objects kept around and also cannot turn off the merging. If only 
there was something like SLAB_NO_MERGE and kmem_cache_preallocate(cache, 
n). With that we would be able to keep some amount of ready objects in 
the cache.

As an alternative I was thinking about how hard would it be to keep the 
priority levels around even when lists become empty? And only prune from 
the shrinker perhaps? Or from the idle worker if we get to some larger 
number of priority levels.

Regards,

Tvrtko

> +		/* Convert an allocation failure to a priority bump */
> +		if (unlikely(!p)) {
> +			prio = I915_PRIORITY_NORMAL; /* recurses just once */
> +			goto find_priolist;
>  		}
>  	}
> -	rb_link_node(&pt->node, rb, p);
> -	rb_insert_color(&pt->node, root);
> +
> +	p->priority = prio;
> +	rb_link_node(&p->node, rb, parent);
> +	rb_insert_color(&p->node, &engine->execlist_queue);
> +
> +	INIT_LIST_HEAD(&p->requests);
> +	list_add_tail(&pt->link, &p->requests);
> +
> +	if (first)
> +		engine->execlist_first = &p->node;
>
>  	return first;
>  }
> @@ -644,12 +688,16 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
>  	/* Will be called from irq-context when using foreign fences. */
>  	spin_lock_irqsave(&engine->timeline->lock, flags);
>
> -	if (insert_request(&request->priotree, &engine->execlist_queue)) {
> -		engine->execlist_first = &request->priotree.node;
> +	if (insert_request(engine,
> +			   &request->priotree,
> +			   request->priotree.priority)) {
>  		if (execlists_elsp_ready(engine))
>  			tasklet_hi_schedule(&engine->irq_tasklet);
>  	}
>
> +	GEM_BUG_ON(!engine->execlist_first);
> +	GEM_BUG_ON(list_empty(&request->priotree.link));
> +
>  	spin_unlock_irqrestore(&engine->timeline->lock, flags);
>  }
>
> @@ -734,10 +782,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  			continue;
>
>  		pt->priority = prio;
> -		if (!RB_EMPTY_NODE(&pt->node)) {
> -			rb_erase(&pt->node, &engine->execlist_queue);
> -			if (insert_request(pt, &engine->execlist_queue))
> -				engine->execlist_first = &pt->node;
> +		if (!list_empty(&pt->link)) {
> +			__list_del_entry(&pt->link);
> +			insert_request(engine, pt, prio);
>  		}
>  	}
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c380f3ac8a39..0ca6f5c3f9a7 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -177,6 +177,12 @@ enum intel_engine_id {
>  	VECS
>  };
>
> +struct i915_priolist {
> +	struct rb_node node;
> +	struct list_head requests;
> +	int priority;
> +};
> +
>  #define INTEL_ENGINE_CS_MAX_NAME 8
>
>  struct intel_engine_cs {
> @@ -367,6 +373,7 @@ struct intel_engine_cs {
>
>  	/* Execlists */
>  	struct tasklet_struct irq_tasklet;
> +	struct i915_priolist default_priolist;
>  	struct execlist_port {
>  		struct drm_i915_gem_request *request_count;
>  #define EXECLIST_COUNT_BITS 2
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/12] drm/i915/execlists: Reduce lock contention between schedule/submit_request
  2017-05-15 10:55     ` Chris Wilson
@ 2017-05-15 11:46       ` Tvrtko Ursulin
  0 siblings, 0 replies; 36+ messages in thread
From: Tvrtko Ursulin @ 2017-05-15 11:46 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/05/2017 11:55, Chris Wilson wrote:
> On Mon, May 15, 2017 at 11:51:52AM +0100, Tvrtko Ursulin wrote:
>>
>> On 11/05/2017 20:59, Chris Wilson wrote:
>>> If we do not require to perform priority bumping, and we haven't yet
>>> submitted the request, we can update its priority in situ and skip
>>> acquiring the engine locks -- thus avoiding any contention between us
>>> and submit/execute.
>>>
>>> v2: Remove the stack element from the list if we can do the early
>>> assignment.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>> drivers/gpu/drm/i915/intel_lrc.c | 13 +++++++++++++
>>> 1 file changed, 13 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 7e41529bd074..3cfb4b145b75 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -767,6 +767,19 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>>> 		list_safe_reset_next(dep, p, dfs_link);
>>> 	}
>>>
>>> +	/* If we didn't need to bump any existing priorities, and we haven't
>>> +	 * yet submitted this request (i.e. there is no potential race with
>>> +	 * execlists_submit_request()), we can set our own priority and skip
>>> +	 * acquiring the engine locks.
>>> +	 */
>>> +	if (request->priotree.priority == INT_MIN) {
>>> +		GEM_BUG_ON(!list_empty(&request->priotree.link));
>>> +		request->priotree.priority = prio;
>>> +		if (stack.dfs_link.next == stack.dfs_link.prev)
>>> +			return;
>>> +		__list_del_entry(&stack.dfs_link);
>>
>> Hm is early processing the out of queue request with no dependencies
>> safe? Recently you fixed a race in this area, which AFAIK was about
>> doing that outside the engine lock and then request becoming ready
>> in parallel, racing with the FIFO order.
>
> Yes. In this case we know that we have commited the request as we are in
> i915_add_request (the only time we may schedule with prio == INT_MIN).

Okay, sounds safe in that case.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/12] drm/i915: Remove kref from i915_sw_fence (rev2)
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (13 preceding siblings ...)
  2017-05-12 12:13 ` Mika Kuoppala
@ 2017-05-15 11:47 ` Patchwork
  2017-05-15 14:01 ` ✓ Fi.CI.BAT: success for series starting with [01/12] drm/i915: Remove kref from i915_sw_fence (rev4) Patchwork
  15 siblings, 0 replies; 36+ messages in thread
From: Patchwork @ 2017-05-15 11:47 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/12] drm/i915: Remove kref from i915_sw_fence (rev2)
URL   : https://patchwork.freedesktop.org/series/24314/
State : success

== Summary ==

Series 24314v2 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/24314/revisions/2/mbox/

Test gem_exec_flush:
        Subgroup basic-batch-kernel-default-uc:
                fail       -> PASS       (fi-snb-2600) fdo#100007
Test kms_busy:
        Subgroup basic-flip-default-c:
                incomplete -> PASS       (fi-bxt-j4205) fdo#101047

fdo#100007 https://bugs.freedesktop.org/show_bug.cgi?id=100007
fdo#101047 https://bugs.freedesktop.org/show_bug.cgi?id=101047

fi-bdw-gvtdvm    total:278  pass:256  dwarn:8   dfail:0   fail:0   skip:14  time:434s
fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time:514s
fi-byt-j1900     total:278  pass:254  dwarn:0   dfail:0   fail:0   skip:24  time:494s
fi-byt-n2820     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:486s
fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:424s
fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:0   skip:50  time:419s
fi-ivb-3520m     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:502s
fi-kbl-7500u     total:278  pass:255  dwarn:5   dfail:0   fail:0   skip:18  time:465s
fi-kbl-7560u     total:278  pass:263  dwarn:5   dfail:0   fail:0   skip:10  time:577s
fi-skl-6700hq    total:278  pass:261  dwarn:0   dfail:0   fail:0   skip:17  time:585s
fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18  time:462s
fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:501s
fi-skl-gvtdvm    total:278  pass:265  dwarn:0   dfail:0   fail:0   skip:13  time:437s
fi-snb-2520m     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:534s
fi-snb-2600      total:278  pass:249  dwarn:0   dfail:0   fail:0   skip:29  time:419s

d79d2bb351aa06bc5b39c070068fea112cfa29ef drm-tip: 2017y-05m-15d-08h-55m-10s UTC integration manifest
d672f69 drm/i915: Don't force serialisation on marking up execlists irq posted
991ccb2 drm/i915: Stop inlining the execlists IRQ handler
928505c drm/i915/execlists: Reduce lock contention between schedule/submit_request
a505352 drm/i915: Create a kmem_cache to allocate struct i915_priolist from
cd7be6a drm/i915: Split execlist priority queue into rbtree + linked list
6a30067 drm/i915: Use a define for the default priority [0]
57cd7d6 drm/i915: Don't mark an execlists context-switch when idle
dd5520e drm/i915/execlists: Pack the count into the low bits of the port.request
9e93160 drm/i915: Redefine ptr_pack_bits() and friends
a7cd62b drm/i915: Make ptr_unpack_bits() more function-like
fd2ec81 drm/i915: Import the kfence selftests for i915_sw_fence
934c017 drm/i915: Remove kref from i915_sw_fence

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4695/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 08/12] drm/i915: Split execlist priority queue into rbtree + linked list
  2017-05-15 11:45   ` Tvrtko Ursulin
@ 2017-05-15 12:26     ` Chris Wilson
  0 siblings, 0 replies; 36+ messages in thread
From: Chris Wilson @ 2017-05-15 12:26 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Mon, May 15, 2017 at 12:45:16PM +0100, Tvrtko Ursulin wrote:
> 
> On 11/05/2017 20:59, Chris Wilson wrote:
> >+	if (prio == I915_PRIORITY_NORMAL) {
> >+		p = &engine->default_priolist;
> >+	} else {
> >+		p = kmalloc(sizeof(*p), GFP_ATOMIC);
> 
> I am back to being nervous about this.
> 
> I don't see that even with a dedicated slab we can control the
> number of spare objects kept around and also cannot turn off the
> merging. If only there was something like SLAB_NO_MERGE and
> kmem_cache_preallocate(cache, n). With that we would be able to keep
> some amount of ready objects in the cache.
> 
> As an alternative I was thinking about how hard would it be to keep
> the priority levels around even when lists become empty? And only
> prune from the shrinker perhaps? Or from the idle worker if we get
> to some larger number of priority levels.

Having a number preallocated, or keeping a freelist (which I'd prefer
using the slab cache for then writing our own) of priolist cannot
guarantee that this allocation will succeed. I'm still contemplating
that we may use a full u32 range, and we are back to just using an
rbtree inside the priotree instead of levels.

Reduce to a list inside a level is still a very useful simplification
for unwind that I think we want to persevere. As far as rendering
out-of-order following an oom, I'm not that concerned about -- we may
hang the gpu in the middle of a swap storm, worst case.

Ok, Michal is right. If we permanently fallback to 0 after a failure,
ordering is correct. We can then recover to multiple levels on idle.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH v2] drm/i915: Split execlist priority queue into rbtree + linked list
  2017-05-11 19:59 ` [PATCH 08/12] drm/i915: Split execlist priority queue into rbtree + linked list Chris Wilson
  2017-05-15 11:45   ` Tvrtko Ursulin
@ 2017-05-15 12:56   ` Chris Wilson
  2017-05-15 13:27     ` Chris Wilson
  2017-05-15 13:29     ` [PATCH v3] " Chris Wilson
  1 sibling, 2 replies; 36+ messages in thread
From: Chris Wilson @ 2017-05-15 12:56 UTC (permalink / raw)
  To: intel-gfx

All the requests at the same priority are executed in FIFO order. They
do not need to be stored in the rbtree themselves, as they are a simple
list within a level. If we move the requests at one priority into a list,
we can then reduce the rbtree to the set of priorities. This should keep
the height of the rbtree small, as the number of active priorities can not
exceed the number of active requests and should be typically only a few.

Currently, we have ~2k possible different priority levels, that may
increase to allow even more fine grained selection. Allocating those in
advance seems a waste (and may be impossible), so we opt for allocating
upon first use, and freeing after its requests are depleted. To avoid
the possibility of an allocation failure causing us to lose a request,
we preallocate the default priority (0) and bump any request to that
priority if we fail to allocate it the appropriate plist. Having a
request (that is ready to run, so not leading to corruption) execute
out-of-order is better than leaking the request (and its dependency
tree) entirely.

There should be a benefit to reducing execlists_dequeue() to principally
using a simple list (and reducing the frequency of both rbtree iteration
and balancing on erase) but for typical workloads, request coalescing
should be small enough that we don't notice any change. The main gain is
from improving PI calls to schedule, and the explicit list within a
level should make request unwinding simpler (we just need to insert at
the head of the list rather than the tail and not have to make the
rbtree search more complicated).

v2: Avoid use-after-free when deleting a depleted priolist

v3: Michał found the solution to handling the allocation failure
gracefully. If we disable all priority scheduling following the
allocation failure, those requests will be executed in fifo and we will
ensure that this request and its dependencies are in strict fifo (even
when it doesn't realise it is only a single list). Normal scheduling is
restored once we know the device is idle, until the next failure!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  11 +-
 drivers/gpu/drm/i915/i915_gem.c            |   7 +-
 drivers/gpu/drm/i915/i915_gem_request.c    |   4 +-
 drivers/gpu/drm/i915/i915_gem_request.h    |   2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |  50 ++++----
 drivers/gpu/drm/i915/i915_utils.h          |   9 ++
 drivers/gpu/drm/i915/intel_engine_cs.c     |  12 ++
 drivers/gpu/drm/i915/intel_lrc.c           | 184 +++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   9 ++
 9 files changed, 193 insertions(+), 95 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f13b5b895a2b..553f016efe64 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3352,7 +3352,6 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 
 		if (i915.enable_execlists) {
 			u32 ptr, read, write;
-			struct rb_node *rb;
 			unsigned int idx;
 
 			seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
@@ -3396,9 +3395,13 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 			rcu_read_unlock();
 
 			spin_lock_irq(&engine->timeline->lock);
-			for (rb = engine->execlist_first; rb; rb = rb_next(rb)) {
-				rq = rb_entry(rb, typeof(*rq), priotree.node);
-				print_request(m, rq, "\t\tQ ");
+			for (rb = engine->execlist_first; rb; rb = rb_next(rb)){
+				struct i915_priolist *p =
+					rb_entry(rb, typeof(*p), node);
+
+				list_for_each_entry(rq, &p->requests,
+						    priotree.link)
+					print_request(m, rq, "\t\tQ ");
 			}
 			spin_unlock_irq(&engine->timeline->lock);
 		} else if (INTEL_GEN(dev_priv) > 6) {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 83518cdb19f3..07db08cc6be0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3155,8 +3155,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv), gt.idle_work.work);
 	struct drm_device *dev = &dev_priv->drm;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	bool rearm_hangcheck;
 
 	if (!READ_ONCE(dev_priv->gt.awake))
@@ -3194,10 +3192,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	if (wait_for(intel_engines_are_idle(dev_priv), 10))
 		DRM_ERROR("Timeout waiting for engines to idle\n");
 
-	for_each_engine(engine, dev_priv, id) {
-		intel_engine_disarm_breadcrumbs(engine);
-		i915_gem_batch_pool_fini(&engine->batch_pool);
-	}
+	intel_engines_mark_idle(dev_priv);
 	i915_gem_timelines_mark_idle(dev_priv);
 
 	GEM_BUG_ON(!dev_priv->gt.awake);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 10361c7e3b37..1ccf2522cdfd 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -159,7 +159,7 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
 {
 	struct i915_dependency *dep, *next;
 
-	GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
+	GEM_BUG_ON(!list_empty(&pt->link));
 
 	/* Everyone we depended upon (the fences we wait to be signaled)
 	 * should retire before us and remove themselves from our list.
@@ -185,7 +185,7 @@ i915_priotree_init(struct i915_priotree *pt)
 {
 	INIT_LIST_HEAD(&pt->signalers_list);
 	INIT_LIST_HEAD(&pt->waiters_list);
-	RB_CLEAR_NODE(&pt->node);
+	INIT_LIST_HEAD(&pt->link);
 	pt->priority = INT_MIN;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 4c8fb6efc0f1..7b7c84369d78 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -67,7 +67,7 @@ struct i915_dependency {
 struct i915_priotree {
 	struct list_head signalers_list; /* those before us, we depend upon */
 	struct list_head waiters_list; /* those after us, they depend upon us */
-	struct rb_node node;
+	struct list_head link;
 	int priority;
 #define I915_PRIORITY_MAX 1024
 #define I915_PRIORITY_NORMAL 0
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index f069c1faf9a9..440bab856cc2 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -674,32 +674,42 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 
 	spin_lock_irq(&engine->timeline->lock);
 	rb = engine->execlist_first;
+	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
 	while (rb) {
-		struct drm_i915_gem_request *rq =
-			rb_entry(rb, typeof(*rq), priotree.node);
-
-		if (last && rq->ctx != last->ctx) {
-			if (port != engine->execlist_port)
-				break;
-
-			port_assign(port, last);
-			port++;
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct drm_i915_gem_request *rq, *rn;
+
+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+			if (last && rq->ctx != last->ctx) {
+				if (port != engine->execlist_port) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				port_assign(port, last);
+				port++;
+			}
+
+			INIT_LIST_HEAD(&rq->priotree.link);
+			rq->priotree.priority = INT_MAX;
+
+			i915_guc_submit(rq);
+			trace_i915_gem_request_in(rq, port - engine->execlist_port);
+			last = rq;
+			submit = true;
 		}
 
 		rb = rb_next(rb);
-		rb_erase(&rq->priotree.node, &engine->execlist_queue);
-		RB_CLEAR_NODE(&rq->priotree.node);
-		rq->priotree.priority = INT_MAX;
-
-		i915_guc_submit(rq);
-		trace_i915_gem_request_in(rq, port - engine->execlist_port);
-		last = rq;
-		submit = true;
+		rb_erase(&p->node, &engine->execlist_queue);
+		INIT_LIST_HEAD(&p->requests);
+		if (p->priority != I915_PRIORITY_NORMAL)
+			kfree(p);
 	}
-	if (submit) {
+done:
+	engine->execlist_first = rb;
+	if (submit)
 		port_assign(port, last);
-		engine->execlist_first = rb;
-	}
 	spin_unlock_irq(&engine->timeline->lock);
 
 	return submit;
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index d9df23795f9a..16ecd1ab108d 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -105,4 +105,13 @@
 	__idx;								\
 })
 
+#include <linux/list.h>
+
+static inline void __list_del_many(struct list_head *head,
+				   struct list_head *first)
+{
+	first->prev = head;
+	WRITE_ONCE(head->next, first);
+}
+
 #endif /* !__I915_UTILS_H */
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index c89f0e2030da..ea13fa2128cb 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1272,6 +1272,18 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 		engine->set_default_submission(engine);
 }
 
+void intel_engines_mark_idle(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id) {
+		intel_engine_disarm_breadcrumbs(engine);
+		i915_gem_batch_pool_fini(&engine->batch_pool);
+		engine->no_priolist = false;
+	}
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_engine.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d7cb5517f596..df5ac90a0605 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -436,57 +436,76 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 	spin_lock_irq(&engine->timeline->lock);
 	rb = engine->execlist_first;
+	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
 	while (rb) {
-		struct drm_i915_gem_request *cursor =
-			rb_entry(rb, typeof(*cursor), priotree.node);
-
-		/* Can we combine this request with the current port? It has to
-		 * be the same context/ringbuffer and not have any exceptions
-		 * (e.g. GVT saying never to combine contexts).
-		 *
-		 * If we can combine the requests, we can execute both by
-		 * updating the RING_TAIL to point to the end of the second
-		 * request, and so we never need to tell the hardware about
-		 * the first.
-		 */
-		if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
-			/* If we are on the second port and cannot combine
-			 * this request with the last, then we are done.
-			 */
-			if (port != engine->execlist_port)
-				break;
-
-			/* If GVT overrides us we only ever submit port[0],
-			 * leaving port[1] empty. Note that we also have
-			 * to be careful that we don't queue the same
-			 * context (even though a different request) to
-			 * the second port.
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct drm_i915_gem_request *rq, *rn;
+
+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+			/*
+			 * Can we combine this request with the current port?
+			 * It has to be the same context/ringbuffer and not
+			 * have any exceptions (e.g. GVT saying never to
+			 * combine contexts).
+			 *
+			 * If we can combine the requests, we can execute both
+			 * by updating the RING_TAIL to point to the end of the
+			 * second request, and so we never need to tell the
+			 * hardware about the first.
 			 */
-			if (ctx_single_port_submission(last->ctx) ||
-			    ctx_single_port_submission(cursor->ctx))
-				break;
+			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
+				/*
+				 * If we are on the second port and cannot
+				 * combine this request with the last, then we
+				 * are done.
+				 */
+				if (port != engine->execlist_port) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				/*
+				 * If GVT overrides us we only ever submit
+				 * port[0], leaving port[1] empty. Note that we
+				 * also have to be careful that we don't queue
+				 * the same context (even though a different
+				 * request) to the second port.
+				 */
+				if (ctx_single_port_submission(last->ctx) ||
+				    ctx_single_port_submission(rq->ctx)) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				GEM_BUG_ON(last->ctx == rq->ctx);
+
+				if (submit)
+					port_assign(port, last);
+				port++;
+			}
 
-			GEM_BUG_ON(last->ctx == cursor->ctx);
+			INIT_LIST_HEAD(&rq->priotree.link);
+			rq->priotree.priority = INT_MAX;
 
-			if (submit)
-				port_assign(port, last);
-			port++;
+			__i915_gem_request_submit(rq);
+			trace_i915_gem_request_in(rq,
+						  port - engine->execlist_port);
+			last = rq;
+			submit = true;
 		}
 
 		rb = rb_next(rb);
-		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
-		RB_CLEAR_NODE(&cursor->priotree.node);
-		cursor->priotree.priority = INT_MAX;
-
-		__i915_gem_request_submit(cursor);
-		trace_i915_gem_request_in(cursor, port - engine->execlist_port);
-		last = cursor;
-		submit = true;
+		rb_erase(&p->node, &engine->execlist_queue);
+		INIT_LIST_HEAD(&p->requests);
+		if (p->priority != I915_PRIORITY_NORMAL)
+			kfree(p);
 	}
-	if (submit) {
+done:
+	engine->execlist_first = rb;
+	if (submit)
 		port_assign(port, last);
-		engine->execlist_first = rb;
-	}
 	spin_unlock_irq(&engine->timeline->lock);
 
 	if (submit)
@@ -610,28 +629,66 @@ static void intel_lrc_irq_handler(unsigned long data)
 	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
 }
 
-static bool insert_request(struct i915_priotree *pt, struct rb_root *root)
+static bool
+insert_request(struct intel_engine_cs *engine,
+	       struct i915_priotree *pt,
+	       int prio)
 {
-	struct rb_node **p, *rb;
+	struct i915_priolist *p;
+	struct rb_node **parent, *rb;
 	bool first = true;
 
+	if (unlikely(!engine->no_priolist))
+		prio = I915_PRIORITY_NORMAL;
+
+find_priolist:
 	/* most positive priority is scheduled first, equal priorities fifo */
 	rb = NULL;
-	p = &root->rb_node;
-	while (*p) {
-		struct i915_priotree *pos;
-
-		rb = *p;
-		pos = rb_entry(rb, typeof(*pos), node);
-		if (pt->priority > pos->priority) {
-			p = &rb->rb_left;
-		} else {
-			p = &rb->rb_right;
+	parent = &engine->execlist_queue.rb_node;
+	while (*parent) {
+		rb = *parent;
+		p = rb_entry(rb, typeof(*p), node);
+		if (prio > p->priority) {
+			parent = &rb->rb_left;
+		} else if (prio < p->priority) {
+			parent = &rb->rb_right;
 			first = false;
+		} else {
+			list_add_tail(&pt->link, &p->requests);
+			return false;
+		}
+	}
+
+	if (prio == I915_PRIORITY_NORMAL) {
+		p = &engine->default_priolist;
+	} else {
+		p = kmalloc(sizeof(*p), GFP_ATOMIC);
+		/* Convert an allocation failure to a priority bump */
+		if (unlikely(!p)) {
+			prio = I915_PRIORITY_NORMAL; /* recurses just once */
+
+			/* To maintain ordering with all rendering, after an
+			 * allocation failure we have to disable all scheduling.
+			 * Requests will then be executed in fifo, and schedule
+			 * will ensure that dependencies are emitted in fifo.
+			 * There will be still some reordering with existing
+			 * request, so if userspace lied about their
+			 * dependdencies that reordering may be visible.
+			 */
+			engine->no_priolist = true;
+			goto find_priolist;
 		}
 	}
-	rb_link_node(&pt->node, rb, p);
-	rb_insert_color(&pt->node, root);
+
+	p->priority = prio;
+	rb_link_node(&p->node, rb, parent);
+	rb_insert_color(&p->node, &engine->execlist_queue);
+
+	INIT_LIST_HEAD(&p->requests);
+	list_add_tail(&pt->link, &p->requests);
+
+	if (first)
+		engine->execlist_first = &p->node;
 
 	return first;
 }
@@ -644,12 +701,16 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 	/* Will be called from irq-context when using foreign fences. */
 	spin_lock_irqsave(&engine->timeline->lock, flags);
 
-	if (insert_request(&request->priotree, &engine->execlist_queue)) {
-		engine->execlist_first = &request->priotree.node;
+	if (insert_request(engine,
+			   &request->priotree,
+			   request->priotree.priority)) {
 		if (execlists_elsp_ready(engine))
 			tasklet_hi_schedule(&engine->irq_tasklet);
 	}
 
+	GEM_BUG_ON(!engine->execlist_first);
+	GEM_BUG_ON(list_empty(&request->priotree.link));
+
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
@@ -734,10 +795,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 			continue;
 
 		pt->priority = prio;
-		if (!RB_EMPTY_NODE(&pt->node)) {
-			rb_erase(&pt->node, &engine->execlist_queue);
-			if (insert_request(pt, &engine->execlist_queue))
-				engine->execlist_first = &pt->node;
+		if (!list_empty(&pt->link)) {
+			__list_del_entry(&pt->link);
+			insert_request(engine, pt, prio);
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c380f3ac8a39..fab2322664f4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -177,6 +177,12 @@ enum intel_engine_id {
 	VECS
 };
 
+struct i915_priolist {
+	struct rb_node node;
+	struct list_head requests;
+	int priority;
+};
+
 #define INTEL_ENGINE_CS_MAX_NAME 8
 
 struct intel_engine_cs {
@@ -367,6 +373,8 @@ struct intel_engine_cs {
 
 	/* Execlists */
 	struct tasklet_struct irq_tasklet;
+	struct i915_priolist default_priolist;
+	bool no_priolist;
 	struct execlist_port {
 		struct drm_i915_gem_request *request_count;
 #define EXECLIST_COUNT_BITS 2
@@ -722,6 +730,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
+void intel_engines_mark_idle(struct drm_i915_private *i915);
 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
 
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH v2] drm/i915: Split execlist priority queue into rbtree + linked list
  2017-05-15 12:56   ` [PATCH v2] " Chris Wilson
@ 2017-05-15 13:27     ` Chris Wilson
  2017-05-15 13:29     ` [PATCH v3] " Chris Wilson
  1 sibling, 0 replies; 36+ messages in thread
From: Chris Wilson @ 2017-05-15 13:27 UTC (permalink / raw)
  To: intel-gfx

On Mon, May 15, 2017 at 01:56:16PM +0100, Chris Wilson wrote:
> -static bool insert_request(struct i915_priotree *pt, struct rb_root *root)
> +static bool
> +insert_request(struct intel_engine_cs *engine,
> +	       struct i915_priotree *pt,
> +	       int prio)
>  {
> -	struct rb_node **p, *rb;
> +	struct i915_priolist *p;
> +	struct rb_node **parent, *rb;
>  	bool first = true;
>  
> +	if (unlikely(!engine->no_priolist))

Inverted. Seesh.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH v3] drm/i915: Split execlist priority queue into rbtree + linked list
  2017-05-15 12:56   ` [PATCH v2] " Chris Wilson
  2017-05-15 13:27     ` Chris Wilson
@ 2017-05-15 13:29     ` Chris Wilson
  2017-05-15 14:51       ` Michał Winiarski
  2017-05-16  7:55       ` Tvrtko Ursulin
  1 sibling, 2 replies; 36+ messages in thread
From: Chris Wilson @ 2017-05-15 13:29 UTC (permalink / raw)
  To: intel-gfx

All the requests at the same priority are executed in FIFO order. They
do not need to be stored in the rbtree themselves, as they are a simple
list within a level. If we move the requests at one priority into a list,
we can then reduce the rbtree to the set of priorities. This should keep
the height of the rbtree small, as the number of active priorities can not
exceed the number of active requests and should be typically only a few.

Currently, we have ~2k possible different priority levels, that may
increase to allow even more fine grained selection. Allocating those in
advance seems a waste (and may be impossible), so we opt for allocating
upon first use, and freeing after its requests are depleted. To avoid
the possibility of an allocation failure causing us to lose a request,
we preallocate the default priority (0) and bump any request to that
priority if we fail to allocate it the appropriate plist. Having a
request (that is ready to run, so not leading to corruption) execute
out-of-order is better than leaking the request (and its dependency
tree) entirely.

There should be a benefit to reducing execlists_dequeue() to principally
using a simple list (and reducing the frequency of both rbtree iteration
and balancing on erase) but for typical workloads, request coalescing
should be small enough that we don't notice any change. The main gain is
from improving PI calls to schedule, and the explicit list within a
level should make request unwinding simpler (we just need to insert at
the head of the list rather than the tail and not have to make the
rbtree search more complicated).

v2: Avoid use-after-free when deleting a depleted priolist

v3: Michał found the solution to handling the allocation failure
gracefully. If we disable all priority scheduling following the
allocation failure, those requests will be executed in fifo and we will
ensure that this request and its dependencies are in strict fifo (even
when it doesn't realise it is only a single list). Normal scheduling is
restored once we know the device is idle, until the next failure!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  11 +-
 drivers/gpu/drm/i915/i915_gem.c            |   7 +-
 drivers/gpu/drm/i915/i915_gem_request.c    |   4 +-
 drivers/gpu/drm/i915/i915_gem_request.h    |   2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |  50 ++++----
 drivers/gpu/drm/i915/i915_utils.h          |   9 ++
 drivers/gpu/drm/i915/intel_engine_cs.c     |  12 ++
 drivers/gpu/drm/i915/intel_lrc.c           | 184 +++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   9 ++
 9 files changed, 193 insertions(+), 95 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f13b5b895a2b..553f016efe64 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3352,7 +3352,6 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 
 		if (i915.enable_execlists) {
 			u32 ptr, read, write;
-			struct rb_node *rb;
 			unsigned int idx;
 
 			seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
@@ -3396,9 +3395,13 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 			rcu_read_unlock();
 
 			spin_lock_irq(&engine->timeline->lock);
-			for (rb = engine->execlist_first; rb; rb = rb_next(rb)) {
-				rq = rb_entry(rb, typeof(*rq), priotree.node);
-				print_request(m, rq, "\t\tQ ");
+			for (rb = engine->execlist_first; rb; rb = rb_next(rb)){
+				struct i915_priolist *p =
+					rb_entry(rb, typeof(*p), node);
+
+				list_for_each_entry(rq, &p->requests,
+						    priotree.link)
+					print_request(m, rq, "\t\tQ ");
 			}
 			spin_unlock_irq(&engine->timeline->lock);
 		} else if (INTEL_GEN(dev_priv) > 6) {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 83518cdb19f3..07db08cc6be0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3155,8 +3155,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv), gt.idle_work.work);
 	struct drm_device *dev = &dev_priv->drm;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	bool rearm_hangcheck;
 
 	if (!READ_ONCE(dev_priv->gt.awake))
@@ -3194,10 +3192,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	if (wait_for(intel_engines_are_idle(dev_priv), 10))
 		DRM_ERROR("Timeout waiting for engines to idle\n");
 
-	for_each_engine(engine, dev_priv, id) {
-		intel_engine_disarm_breadcrumbs(engine);
-		i915_gem_batch_pool_fini(&engine->batch_pool);
-	}
+	intel_engines_mark_idle(dev_priv);
 	i915_gem_timelines_mark_idle(dev_priv);
 
 	GEM_BUG_ON(!dev_priv->gt.awake);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 10361c7e3b37..1ccf2522cdfd 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -159,7 +159,7 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
 {
 	struct i915_dependency *dep, *next;
 
-	GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
+	GEM_BUG_ON(!list_empty(&pt->link));
 
 	/* Everyone we depended upon (the fences we wait to be signaled)
 	 * should retire before us and remove themselves from our list.
@@ -185,7 +185,7 @@ i915_priotree_init(struct i915_priotree *pt)
 {
 	INIT_LIST_HEAD(&pt->signalers_list);
 	INIT_LIST_HEAD(&pt->waiters_list);
-	RB_CLEAR_NODE(&pt->node);
+	INIT_LIST_HEAD(&pt->link);
 	pt->priority = INT_MIN;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 4c8fb6efc0f1..7b7c84369d78 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -67,7 +67,7 @@ struct i915_dependency {
 struct i915_priotree {
 	struct list_head signalers_list; /* those before us, we depend upon */
 	struct list_head waiters_list; /* those after us, they depend upon us */
-	struct rb_node node;
+	struct list_head link;
 	int priority;
 #define I915_PRIORITY_MAX 1024
 #define I915_PRIORITY_NORMAL 0
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index f069c1faf9a9..440bab856cc2 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -674,32 +674,42 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 
 	spin_lock_irq(&engine->timeline->lock);
 	rb = engine->execlist_first;
+	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
 	while (rb) {
-		struct drm_i915_gem_request *rq =
-			rb_entry(rb, typeof(*rq), priotree.node);
-
-		if (last && rq->ctx != last->ctx) {
-			if (port != engine->execlist_port)
-				break;
-
-			port_assign(port, last);
-			port++;
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct drm_i915_gem_request *rq, *rn;
+
+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+			if (last && rq->ctx != last->ctx) {
+				if (port != engine->execlist_port) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				port_assign(port, last);
+				port++;
+			}
+
+			INIT_LIST_HEAD(&rq->priotree.link);
+			rq->priotree.priority = INT_MAX;
+
+			i915_guc_submit(rq);
+			trace_i915_gem_request_in(rq, port - engine->execlist_port);
+			last = rq;
+			submit = true;
 		}
 
 		rb = rb_next(rb);
-		rb_erase(&rq->priotree.node, &engine->execlist_queue);
-		RB_CLEAR_NODE(&rq->priotree.node);
-		rq->priotree.priority = INT_MAX;
-
-		i915_guc_submit(rq);
-		trace_i915_gem_request_in(rq, port - engine->execlist_port);
-		last = rq;
-		submit = true;
+		rb_erase(&p->node, &engine->execlist_queue);
+		INIT_LIST_HEAD(&p->requests);
+		if (p->priority != I915_PRIORITY_NORMAL)
+			kfree(p);
 	}
-	if (submit) {
+done:
+	engine->execlist_first = rb;
+	if (submit)
 		port_assign(port, last);
-		engine->execlist_first = rb;
-	}
 	spin_unlock_irq(&engine->timeline->lock);
 
 	return submit;
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index d9df23795f9a..16ecd1ab108d 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -105,4 +105,13 @@
 	__idx;								\
 })
 
+#include <linux/list.h>
+
+static inline void __list_del_many(struct list_head *head,
+				   struct list_head *first)
+{
+	first->prev = head;
+	WRITE_ONCE(head->next, first);
+}
+
 #endif /* !__I915_UTILS_H */
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index c89f0e2030da..ea13fa2128cb 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1272,6 +1272,18 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 		engine->set_default_submission(engine);
 }
 
+void intel_engines_mark_idle(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id) {
+		intel_engine_disarm_breadcrumbs(engine);
+		i915_gem_batch_pool_fini(&engine->batch_pool);
+		engine->no_priolist = false;
+	}
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_engine.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d7cb5517f596..4ce0929a03a9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -436,57 +436,76 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 	spin_lock_irq(&engine->timeline->lock);
 	rb = engine->execlist_first;
+	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
 	while (rb) {
-		struct drm_i915_gem_request *cursor =
-			rb_entry(rb, typeof(*cursor), priotree.node);
-
-		/* Can we combine this request with the current port? It has to
-		 * be the same context/ringbuffer and not have any exceptions
-		 * (e.g. GVT saying never to combine contexts).
-		 *
-		 * If we can combine the requests, we can execute both by
-		 * updating the RING_TAIL to point to the end of the second
-		 * request, and so we never need to tell the hardware about
-		 * the first.
-		 */
-		if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
-			/* If we are on the second port and cannot combine
-			 * this request with the last, then we are done.
-			 */
-			if (port != engine->execlist_port)
-				break;
-
-			/* If GVT overrides us we only ever submit port[0],
-			 * leaving port[1] empty. Note that we also have
-			 * to be careful that we don't queue the same
-			 * context (even though a different request) to
-			 * the second port.
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct drm_i915_gem_request *rq, *rn;
+
+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+			/*
+			 * Can we combine this request with the current port?
+			 * It has to be the same context/ringbuffer and not
+			 * have any exceptions (e.g. GVT saying never to
+			 * combine contexts).
+			 *
+			 * If we can combine the requests, we can execute both
+			 * by updating the RING_TAIL to point to the end of the
+			 * second request, and so we never need to tell the
+			 * hardware about the first.
 			 */
-			if (ctx_single_port_submission(last->ctx) ||
-			    ctx_single_port_submission(cursor->ctx))
-				break;
+			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
+				/*
+				 * If we are on the second port and cannot
+				 * combine this request with the last, then we
+				 * are done.
+				 */
+				if (port != engine->execlist_port) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				/*
+				 * If GVT overrides us we only ever submit
+				 * port[0], leaving port[1] empty. Note that we
+				 * also have to be careful that we don't queue
+				 * the same context (even though a different
+				 * request) to the second port.
+				 */
+				if (ctx_single_port_submission(last->ctx) ||
+				    ctx_single_port_submission(rq->ctx)) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				GEM_BUG_ON(last->ctx == rq->ctx);
+
+				if (submit)
+					port_assign(port, last);
+				port++;
+			}
 
-			GEM_BUG_ON(last->ctx == cursor->ctx);
+			INIT_LIST_HEAD(&rq->priotree.link);
+			rq->priotree.priority = INT_MAX;
 
-			if (submit)
-				port_assign(port, last);
-			port++;
+			__i915_gem_request_submit(rq);
+			trace_i915_gem_request_in(rq,
+						  port - engine->execlist_port);
+			last = rq;
+			submit = true;
 		}
 
 		rb = rb_next(rb);
-		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
-		RB_CLEAR_NODE(&cursor->priotree.node);
-		cursor->priotree.priority = INT_MAX;
-
-		__i915_gem_request_submit(cursor);
-		trace_i915_gem_request_in(cursor, port - engine->execlist_port);
-		last = cursor;
-		submit = true;
+		rb_erase(&p->node, &engine->execlist_queue);
+		INIT_LIST_HEAD(&p->requests);
+		if (p->priority != I915_PRIORITY_NORMAL)
+			kfree(p);
 	}
-	if (submit) {
+done:
+	engine->execlist_first = rb;
+	if (submit)
 		port_assign(port, last);
-		engine->execlist_first = rb;
-	}
 	spin_unlock_irq(&engine->timeline->lock);
 
 	if (submit)
@@ -610,28 +629,66 @@ static void intel_lrc_irq_handler(unsigned long data)
 	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
 }
 
-static bool insert_request(struct i915_priotree *pt, struct rb_root *root)
+static bool
+insert_request(struct intel_engine_cs *engine,
+	       struct i915_priotree *pt,
+	       int prio)
 {
-	struct rb_node **p, *rb;
+	struct i915_priolist *p;
+	struct rb_node **parent, *rb;
 	bool first = true;
 
+	if (unlikely(engine->no_priolist))
+		prio = I915_PRIORITY_NORMAL;
+
+find_priolist:
 	/* most positive priority is scheduled first, equal priorities fifo */
 	rb = NULL;
-	p = &root->rb_node;
-	while (*p) {
-		struct i915_priotree *pos;
-
-		rb = *p;
-		pos = rb_entry(rb, typeof(*pos), node);
-		if (pt->priority > pos->priority) {
-			p = &rb->rb_left;
-		} else {
-			p = &rb->rb_right;
+	parent = &engine->execlist_queue.rb_node;
+	while (*parent) {
+		rb = *parent;
+		p = rb_entry(rb, typeof(*p), node);
+		if (prio > p->priority) {
+			parent = &rb->rb_left;
+		} else if (prio < p->priority) {
+			parent = &rb->rb_right;
 			first = false;
+		} else {
+			list_add_tail(&pt->link, &p->requests);
+			return false;
+		}
+	}
+
+	if (prio == I915_PRIORITY_NORMAL) {
+		p = &engine->default_priolist;
+	} else {
+		p = kmalloc(sizeof(*p), GFP_ATOMIC);
+		/* Convert an allocation failure to a priority bump */
+		if (unlikely(!p)) {
+			prio = I915_PRIORITY_NORMAL; /* recurses just once */
+
+			/* To maintain ordering with all rendering, after an
+			 * allocation failure we have to disable all scheduling.
+			 * Requests will then be executed in fifo, and schedule
+			 * will ensure that dependencies are emitted in fifo.
+			 * There will be still some reordering with existing
+			 * request, so if userspace lied about their
+			 * dependdencies that reordering may be visible.
+			 */
+			engine->no_priolist = true;
+			goto find_priolist;
 		}
 	}
-	rb_link_node(&pt->node, rb, p);
-	rb_insert_color(&pt->node, root);
+
+	p->priority = prio;
+	rb_link_node(&p->node, rb, parent);
+	rb_insert_color(&p->node, &engine->execlist_queue);
+
+	INIT_LIST_HEAD(&p->requests);
+	list_add_tail(&pt->link, &p->requests);
+
+	if (first)
+		engine->execlist_first = &p->node;
 
 	return first;
 }
@@ -644,12 +701,16 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 	/* Will be called from irq-context when using foreign fences. */
 	spin_lock_irqsave(&engine->timeline->lock, flags);
 
-	if (insert_request(&request->priotree, &engine->execlist_queue)) {
-		engine->execlist_first = &request->priotree.node;
+	if (insert_request(engine,
+			   &request->priotree,
+			   request->priotree.priority)) {
 		if (execlists_elsp_ready(engine))
 			tasklet_hi_schedule(&engine->irq_tasklet);
 	}
 
+	GEM_BUG_ON(!engine->execlist_first);
+	GEM_BUG_ON(list_empty(&request->priotree.link));
+
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
@@ -734,10 +795,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 			continue;
 
 		pt->priority = prio;
-		if (!RB_EMPTY_NODE(&pt->node)) {
-			rb_erase(&pt->node, &engine->execlist_queue);
-			if (insert_request(pt, &engine->execlist_queue))
-				engine->execlist_first = &pt->node;
+		if (!list_empty(&pt->link)) {
+			__list_del_entry(&pt->link);
+			insert_request(engine, pt, prio);
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c380f3ac8a39..fab2322664f4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -177,6 +177,12 @@ enum intel_engine_id {
 	VECS
 };
 
+struct i915_priolist {
+	struct rb_node node;
+	struct list_head requests;
+	int priority;
+};
+
 #define INTEL_ENGINE_CS_MAX_NAME 8
 
 struct intel_engine_cs {
@@ -367,6 +373,8 @@ struct intel_engine_cs {
 
 	/* Execlists */
 	struct tasklet_struct irq_tasklet;
+	struct i915_priolist default_priolist;
+	bool no_priolist;
 	struct execlist_port {
 		struct drm_i915_gem_request *request_count;
 #define EXECLIST_COUNT_BITS 2
@@ -722,6 +730,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
+void intel_engines_mark_idle(struct drm_i915_private *i915);
 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
 
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/12] drm/i915: Remove kref from i915_sw_fence (rev4)
  2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (14 preceding siblings ...)
  2017-05-15 11:47 ` ✓ Fi.CI.BAT: success for series starting with [01/12] drm/i915: Remove kref from i915_sw_fence (rev2) Patchwork
@ 2017-05-15 14:01 ` Patchwork
  15 siblings, 0 replies; 36+ messages in thread
From: Patchwork @ 2017-05-15 14:01 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/12] drm/i915: Remove kref from i915_sw_fence (rev4)
URL   : https://patchwork.freedesktop.org/series/24314/
State : success

== Summary ==

Series 24314v4 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/24314/revisions/4/mbox/

Test gem_exec_flush:
        Subgroup basic-batch-kernel-default-uc:
                pass       -> FAIL       (fi-snb-2600) fdo#100007
Test gem_exec_suspend:
        Subgroup basic-s4-devices:
                dmesg-warn -> PASS       (fi-snb-2600) fdo#100125
Test kms_cursor_legacy:
        Subgroup basic-busy-flip-before-cursor-atomic:
                pass       -> FAIL       (fi-snb-2600) fdo#100215

fdo#100007 https://bugs.freedesktop.org/show_bug.cgi?id=100007
fdo#100125 https://bugs.freedesktop.org/show_bug.cgi?id=100125
fdo#100215 https://bugs.freedesktop.org/show_bug.cgi?id=100215

fi-bdw-5557u     total:278  pass:267  dwarn:0   dfail:0   fail:0   skip:11  time:446s
fi-bdw-gvtdvm    total:278  pass:256  dwarn:8   dfail:0   fail:0   skip:14  time:434s
fi-bsw-n3050     total:278  pass:242  dwarn:0   dfail:0   fail:0   skip:36  time:583s
fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time:512s
fi-byt-j1900     total:278  pass:254  dwarn:0   dfail:0   fail:0   skip:24  time:492s
fi-byt-n2820     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:495s
fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:420s
fi-hsw-4770r     total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:407s
fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:0   skip:50  time:418s
fi-ivb-3520m     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:501s
fi-ivb-3770      total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:468s
fi-kbl-7500u     total:278  pass:255  dwarn:5   dfail:0   fail:0   skip:18  time:472s
fi-kbl-7560u     total:278  pass:263  dwarn:5   dfail:0   fail:0   skip:10  time:578s
fi-skl-6260u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:454s
fi-skl-6700hq    total:278  pass:261  dwarn:0   dfail:0   fail:0   skip:17  time:580s
fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18  time:471s
fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:501s
fi-skl-gvtdvm    total:278  pass:265  dwarn:0   dfail:0   fail:0   skip:13  time:438s
fi-snb-2520m     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:540s
fi-snb-2600      total:278  pass:247  dwarn:0   dfail:0   fail:2   skip:29  time:408s
fi-bxt-t5700 failed to collect. IGT log at Patchwork_4696/fi-bxt-t5700/igt.log

57e52b31dc86cf78c27a0da647b09aa0166822c3 drm-tip: 2017y-05m-15d-12h-38m-57s UTC integration manifest
c4d6b8a drm/i915: Don't force serialisation on marking up execlists irq posted
4f76737 drm/i915: Stop inlining the execlists IRQ handler
5bc7039 drm/i915/execlists: Reduce lock contention between schedule/submit_request
4c18d2d drm/i915: Create a kmem_cache to allocate struct i915_priolist from
4b41ebe drm/i915: Split execlist priority queue into rbtree + linked list
bc675b4 drm/i915: Use a define for the default priority [0]
bcd7c59 drm/i915: Don't mark an execlists context-switch when idle
3f8e6c1 drm/i915/execlists: Pack the count into the low bits of the port.request
b4c4e51 drm/i915: Redefine ptr_pack_bits() and friends
eca30379 drm/i915: Make ptr_unpack_bits() more function-like
75e8a67 drm/i915: Import the kfence selftests for i915_sw_fence
5a8dd5b drm/i915: Remove kref from i915_sw_fence

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4696/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v3] drm/i915: Split execlist priority queue into rbtree + linked list
  2017-05-15 13:29     ` [PATCH v3] " Chris Wilson
@ 2017-05-15 14:51       ` Michał Winiarski
  2017-05-16  7:55       ` Tvrtko Ursulin
  1 sibling, 0 replies; 36+ messages in thread
From: Michał Winiarski @ 2017-05-15 14:51 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Mon, May 15, 2017 at 02:29:50PM +0100, Chris Wilson wrote:
> All the requests at the same priority are executed in FIFO order. They
> do not need to be stored in the rbtree themselves, as they are a simple
> list within a level. If we move the requests at one priority into a list,
> we can then reduce the rbtree to the set of priorities. This should keep
> the height of the rbtree small, as the number of active priorities can not
> exceed the number of active requests and should be typically only a few.
> 
> Currently, we have ~2k possible different priority levels, that may
> increase to allow even more fine grained selection. Allocating those in
> advance seems a waste (and may be impossible), so we opt for allocating
> upon first use, and freeing after its requests are depleted. To avoid
> the possibility of an allocation failure causing us to lose a request,
> we preallocate the default priority (0) and bump any request to that
> priority if we fail to allocate it the appropriate plist. Having a
> request (that is ready to run, so not leading to corruption) execute
> out-of-order is better than leaking the request (and its dependency
> tree) entirely.
> 
> There should be a benefit to reducing execlists_dequeue() to principally
> using a simple list (and reducing the frequency of both rbtree iteration
> and balancing on erase) but for typical workloads, request coalescing
> should be small enough that we don't notice any change. The main gain is
> from improving PI calls to schedule, and the explicit list within a
> level should make request unwinding simpler (we just need to insert at
> the head of the list rather than the tail and not have to make the
> rbtree search more complicated).
> 
> v2: Avoid use-after-free when deleting a depleted priolist
> 
> v3: Michał found the solution to handling the allocation failure
> gracefully. If we disable all priority scheduling following the
> allocation failure, those requests will be executed in fifo and we will
> ensure that this request and its dependencies are in strict fifo (even
> when it doesn't realise it is only a single list). Normal scheduling is
> restored once we know the device is idle, until the next failure!

Actually, this is a result of a conversation with the other Michał W, so:
Suggested-by: Michał Wajdeczko <michal.wajdeczko@intel.com>

also
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>

Thanks!

-Michał
 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c        |  11 +-
>  drivers/gpu/drm/i915/i915_gem.c            |   7 +-
>  drivers/gpu/drm/i915/i915_gem_request.c    |   4 +-
>  drivers/gpu/drm/i915/i915_gem_request.h    |   2 +-
>  drivers/gpu/drm/i915/i915_guc_submission.c |  50 ++++----
>  drivers/gpu/drm/i915/i915_utils.h          |   9 ++
>  drivers/gpu/drm/i915/intel_engine_cs.c     |  12 ++
>  drivers/gpu/drm/i915/intel_lrc.c           | 184 +++++++++++++++++++----------
>  drivers/gpu/drm/i915/intel_ringbuffer.h    |   9 ++
>  9 files changed, 193 insertions(+), 95 deletions(-)
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v3] drm/i915: Split execlist priority queue into rbtree + linked list
  2017-05-15 13:29     ` [PATCH v3] " Chris Wilson
  2017-05-15 14:51       ` Michał Winiarski
@ 2017-05-16  7:55       ` Tvrtko Ursulin
  1 sibling, 0 replies; 36+ messages in thread
From: Tvrtko Ursulin @ 2017-05-16  7:55 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/05/2017 14:29, Chris Wilson wrote:
> All the requests at the same priority are executed in FIFO order. They
> do not need to be stored in the rbtree themselves, as they are a simple
> list within a level. If we move the requests at one priority into a list,
> we can then reduce the rbtree to the set of priorities. This should keep
> the height of the rbtree small, as the number of active priorities can not
> exceed the number of active requests and should be typically only a few.
>
> Currently, we have ~2k possible different priority levels, that may
> increase to allow even more fine grained selection. Allocating those in
> advance seems a waste (and may be impossible), so we opt for allocating
> upon first use, and freeing after its requests are depleted. To avoid
> the possibility of an allocation failure causing us to lose a request,
> we preallocate the default priority (0) and bump any request to that
> priority if we fail to allocate it the appropriate plist. Having a
> request (that is ready to run, so not leading to corruption) execute
> out-of-order is better than leaking the request (and its dependency
> tree) entirely.
>
> There should be a benefit to reducing execlists_dequeue() to principally
> using a simple list (and reducing the frequency of both rbtree iteration
> and balancing on erase) but for typical workloads, request coalescing
> should be small enough that we don't notice any change. The main gain is
> from improving PI calls to schedule, and the explicit list within a
> level should make request unwinding simpler (we just need to insert at
> the head of the list rather than the tail and not have to make the
> rbtree search more complicated).
>
> v2: Avoid use-after-free when deleting a depleted priolist
>
> v3: Michał found the solution to handling the allocation failure
> gracefully. If we disable all priority scheduling following the
> allocation failure, those requests will be executed in fifo and we will
> ensure that this request and its dependencies are in strict fifo (even
> when it doesn't realise it is only a single list). Normal scheduling is
> restored once we know the device is idle, until the next failure!
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c        |  11 +-
>  drivers/gpu/drm/i915/i915_gem.c            |   7 +-
>  drivers/gpu/drm/i915/i915_gem_request.c    |   4 +-
>  drivers/gpu/drm/i915/i915_gem_request.h    |   2 +-
>  drivers/gpu/drm/i915/i915_guc_submission.c |  50 ++++----
>  drivers/gpu/drm/i915/i915_utils.h          |   9 ++
>  drivers/gpu/drm/i915/intel_engine_cs.c     |  12 ++
>  drivers/gpu/drm/i915/intel_lrc.c           | 184 +++++++++++++++++++----------
>  drivers/gpu/drm/i915/intel_ringbuffer.h    |   9 ++
>  9 files changed, 193 insertions(+), 95 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index f13b5b895a2b..553f016efe64 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3352,7 +3352,6 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>
>  		if (i915.enable_execlists) {
>  			u32 ptr, read, write;
> -			struct rb_node *rb;
>  			unsigned int idx;
>
>  			seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
> @@ -3396,9 +3395,13 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>  			rcu_read_unlock();
>
>  			spin_lock_irq(&engine->timeline->lock);
> -			for (rb = engine->execlist_first; rb; rb = rb_next(rb)) {
> -				rq = rb_entry(rb, typeof(*rq), priotree.node);
> -				print_request(m, rq, "\t\tQ ");
> +			for (rb = engine->execlist_first; rb; rb = rb_next(rb)){
> +				struct i915_priolist *p =
> +					rb_entry(rb, typeof(*p), node);
> +
> +				list_for_each_entry(rq, &p->requests,
> +						    priotree.link)
> +					print_request(m, rq, "\t\tQ ");
>  			}
>  			spin_unlock_irq(&engine->timeline->lock);
>  		} else if (INTEL_GEN(dev_priv) > 6) {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 83518cdb19f3..07db08cc6be0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3155,8 +3155,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
>  	struct drm_i915_private *dev_priv =
>  		container_of(work, typeof(*dev_priv), gt.idle_work.work);
>  	struct drm_device *dev = &dev_priv->drm;
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
>  	bool rearm_hangcheck;
>
>  	if (!READ_ONCE(dev_priv->gt.awake))
> @@ -3194,10 +3192,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
>  	if (wait_for(intel_engines_are_idle(dev_priv), 10))
>  		DRM_ERROR("Timeout waiting for engines to idle\n");
>
> -	for_each_engine(engine, dev_priv, id) {
> -		intel_engine_disarm_breadcrumbs(engine);
> -		i915_gem_batch_pool_fini(&engine->batch_pool);
> -	}
> +	intel_engines_mark_idle(dev_priv);
>  	i915_gem_timelines_mark_idle(dev_priv);
>
>  	GEM_BUG_ON(!dev_priv->gt.awake);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 10361c7e3b37..1ccf2522cdfd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -159,7 +159,7 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
>  {
>  	struct i915_dependency *dep, *next;
>
> -	GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
> +	GEM_BUG_ON(!list_empty(&pt->link));
>
>  	/* Everyone we depended upon (the fences we wait to be signaled)
>  	 * should retire before us and remove themselves from our list.
> @@ -185,7 +185,7 @@ i915_priotree_init(struct i915_priotree *pt)
>  {
>  	INIT_LIST_HEAD(&pt->signalers_list);
>  	INIT_LIST_HEAD(&pt->waiters_list);
> -	RB_CLEAR_NODE(&pt->node);
> +	INIT_LIST_HEAD(&pt->link);
>  	pt->priority = INT_MIN;
>  }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 4c8fb6efc0f1..7b7c84369d78 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -67,7 +67,7 @@ struct i915_dependency {
>  struct i915_priotree {
>  	struct list_head signalers_list; /* those before us, we depend upon */
>  	struct list_head waiters_list; /* those after us, they depend upon us */
> -	struct rb_node node;
> +	struct list_head link;
>  	int priority;
>  #define I915_PRIORITY_MAX 1024
>  #define I915_PRIORITY_NORMAL 0
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index f069c1faf9a9..440bab856cc2 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -674,32 +674,42 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
>
>  	spin_lock_irq(&engine->timeline->lock);
>  	rb = engine->execlist_first;
> +	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
>  	while (rb) {
> -		struct drm_i915_gem_request *rq =
> -			rb_entry(rb, typeof(*rq), priotree.node);
> -
> -		if (last && rq->ctx != last->ctx) {
> -			if (port != engine->execlist_port)
> -				break;
> -
> -			port_assign(port, last);
> -			port++;
> +		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> +		struct drm_i915_gem_request *rq, *rn;
> +
> +		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> +			if (last && rq->ctx != last->ctx) {
> +				if (port != engine->execlist_port) {
> +					__list_del_many(&p->requests,
> +							&rq->priotree.link);
> +					goto done;
> +				}
> +
> +				port_assign(port, last);
> +				port++;
> +			}
> +
> +			INIT_LIST_HEAD(&rq->priotree.link);
> +			rq->priotree.priority = INT_MAX;
> +
> +			i915_guc_submit(rq);
> +			trace_i915_gem_request_in(rq, port - engine->execlist_port);
> +			last = rq;
> +			submit = true;
>  		}
>
>  		rb = rb_next(rb);
> -		rb_erase(&rq->priotree.node, &engine->execlist_queue);
> -		RB_CLEAR_NODE(&rq->priotree.node);
> -		rq->priotree.priority = INT_MAX;
> -
> -		i915_guc_submit(rq);
> -		trace_i915_gem_request_in(rq, port - engine->execlist_port);
> -		last = rq;
> -		submit = true;
> +		rb_erase(&p->node, &engine->execlist_queue);
> +		INIT_LIST_HEAD(&p->requests);
> +		if (p->priority != I915_PRIORITY_NORMAL)
> +			kfree(p);
>  	}
> -	if (submit) {
> +done:
> +	engine->execlist_first = rb;
> +	if (submit)
>  		port_assign(port, last);
> -		engine->execlist_first = rb;
> -	}
>  	spin_unlock_irq(&engine->timeline->lock);
>
>  	return submit;
> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
> index d9df23795f9a..16ecd1ab108d 100644
> --- a/drivers/gpu/drm/i915/i915_utils.h
> +++ b/drivers/gpu/drm/i915/i915_utils.h
> @@ -105,4 +105,13 @@
>  	__idx;								\
>  })
>
> +#include <linux/list.h>
> +
> +static inline void __list_del_many(struct list_head *head,
> +				   struct list_head *first)
> +{
> +	first->prev = head;
> +	WRITE_ONCE(head->next, first);
> +}
> +
>  #endif /* !__I915_UTILS_H */
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index c89f0e2030da..ea13fa2128cb 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1272,6 +1272,18 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
>  		engine->set_default_submission(engine);
>  }
>
> +void intel_engines_mark_idle(struct drm_i915_private *i915)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +
> +	for_each_engine(engine, i915, id) {
> +		intel_engine_disarm_breadcrumbs(engine);
> +		i915_gem_batch_pool_fini(&engine->batch_pool);
> +		engine->no_priolist = false;
> +	}
> +}
> +
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>  #include "selftests/mock_engine.c"
>  #endif
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index d7cb5517f596..4ce0929a03a9 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -436,57 +436,76 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>
>  	spin_lock_irq(&engine->timeline->lock);
>  	rb = engine->execlist_first;
> +	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
>  	while (rb) {
> -		struct drm_i915_gem_request *cursor =
> -			rb_entry(rb, typeof(*cursor), priotree.node);
> -
> -		/* Can we combine this request with the current port? It has to
> -		 * be the same context/ringbuffer and not have any exceptions
> -		 * (e.g. GVT saying never to combine contexts).
> -		 *
> -		 * If we can combine the requests, we can execute both by
> -		 * updating the RING_TAIL to point to the end of the second
> -		 * request, and so we never need to tell the hardware about
> -		 * the first.
> -		 */
> -		if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
> -			/* If we are on the second port and cannot combine
> -			 * this request with the last, then we are done.
> -			 */
> -			if (port != engine->execlist_port)
> -				break;
> -
> -			/* If GVT overrides us we only ever submit port[0],
> -			 * leaving port[1] empty. Note that we also have
> -			 * to be careful that we don't queue the same
> -			 * context (even though a different request) to
> -			 * the second port.
> +		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> +		struct drm_i915_gem_request *rq, *rn;
> +
> +		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> +			/*
> +			 * Can we combine this request with the current port?
> +			 * It has to be the same context/ringbuffer and not
> +			 * have any exceptions (e.g. GVT saying never to
> +			 * combine contexts).
> +			 *
> +			 * If we can combine the requests, we can execute both
> +			 * by updating the RING_TAIL to point to the end of the
> +			 * second request, and so we never need to tell the
> +			 * hardware about the first.
>  			 */
> -			if (ctx_single_port_submission(last->ctx) ||
> -			    ctx_single_port_submission(cursor->ctx))
> -				break;
> +			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
> +				/*
> +				 * If we are on the second port and cannot
> +				 * combine this request with the last, then we
> +				 * are done.
> +				 */
> +				if (port != engine->execlist_port) {
> +					__list_del_many(&p->requests,
> +							&rq->priotree.link);
> +					goto done;
> +				}
> +
> +				/*
> +				 * If GVT overrides us we only ever submit
> +				 * port[0], leaving port[1] empty. Note that we
> +				 * also have to be careful that we don't queue
> +				 * the same context (even though a different
> +				 * request) to the second port.
> +				 */
> +				if (ctx_single_port_submission(last->ctx) ||
> +				    ctx_single_port_submission(rq->ctx)) {
> +					__list_del_many(&p->requests,
> +							&rq->priotree.link);
> +					goto done;
> +				}
> +
> +				GEM_BUG_ON(last->ctx == rq->ctx);
> +
> +				if (submit)
> +					port_assign(port, last);
> +				port++;
> +			}
>
> -			GEM_BUG_ON(last->ctx == cursor->ctx);
> +			INIT_LIST_HEAD(&rq->priotree.link);
> +			rq->priotree.priority = INT_MAX;
>
> -			if (submit)
> -				port_assign(port, last);
> -			port++;
> +			__i915_gem_request_submit(rq);
> +			trace_i915_gem_request_in(rq,
> +						  port - engine->execlist_port);
> +			last = rq;
> +			submit = true;
>  		}
>
>  		rb = rb_next(rb);
> -		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
> -		RB_CLEAR_NODE(&cursor->priotree.node);
> -		cursor->priotree.priority = INT_MAX;
> -
> -		__i915_gem_request_submit(cursor);
> -		trace_i915_gem_request_in(cursor, port - engine->execlist_port);
> -		last = cursor;
> -		submit = true;
> +		rb_erase(&p->node, &engine->execlist_queue);
> +		INIT_LIST_HEAD(&p->requests);
> +		if (p->priority != I915_PRIORITY_NORMAL)
> +			kfree(p);
>  	}
> -	if (submit) {
> +done:
> +	engine->execlist_first = rb;
> +	if (submit)
>  		port_assign(port, last);
> -		engine->execlist_first = rb;
> -	}
>  	spin_unlock_irq(&engine->timeline->lock);
>
>  	if (submit)
> @@ -610,28 +629,66 @@ static void intel_lrc_irq_handler(unsigned long data)
>  	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
>  }
>
> -static bool insert_request(struct i915_priotree *pt, struct rb_root *root)
> +static bool
> +insert_request(struct intel_engine_cs *engine,
> +	       struct i915_priotree *pt,
> +	       int prio)
>  {
> -	struct rb_node **p, *rb;
> +	struct i915_priolist *p;
> +	struct rb_node **parent, *rb;
>  	bool first = true;
>
> +	if (unlikely(engine->no_priolist))
> +		prio = I915_PRIORITY_NORMAL;
> +
> +find_priolist:
>  	/* most positive priority is scheduled first, equal priorities fifo */
>  	rb = NULL;
> -	p = &root->rb_node;
> -	while (*p) {
> -		struct i915_priotree *pos;
> -
> -		rb = *p;
> -		pos = rb_entry(rb, typeof(*pos), node);
> -		if (pt->priority > pos->priority) {
> -			p = &rb->rb_left;
> -		} else {
> -			p = &rb->rb_right;
> +	parent = &engine->execlist_queue.rb_node;
> +	while (*parent) {
> +		rb = *parent;
> +		p = rb_entry(rb, typeof(*p), node);
> +		if (prio > p->priority) {
> +			parent = &rb->rb_left;
> +		} else if (prio < p->priority) {
> +			parent = &rb->rb_right;
>  			first = false;
> +		} else {
> +			list_add_tail(&pt->link, &p->requests);
> +			return false;
> +		}
> +	}
> +
> +	if (prio == I915_PRIORITY_NORMAL) {
> +		p = &engine->default_priolist;
> +	} else {
> +		p = kmalloc(sizeof(*p), GFP_ATOMIC);
> +		/* Convert an allocation failure to a priority bump */
> +		if (unlikely(!p)) {
> +			prio = I915_PRIORITY_NORMAL; /* recurses just once */
> +
> +			/* To maintain ordering with all rendering, after an
> +			 * allocation failure we have to disable all scheduling.
> +			 * Requests will then be executed in fifo, and schedule
> +			 * will ensure that dependencies are emitted in fifo.
> +			 * There will be still some reordering with existing
> +			 * request, so if userspace lied about their

requests plural

> +			 * dependdencies that reordering may be visible.

typo in dependencies

> +			 */
> +			engine->no_priolist = true;
> +			goto find_priolist;
>  		}
>  	}
> -	rb_link_node(&pt->node, rb, p);
> -	rb_insert_color(&pt->node, root);
> +
> +	p->priority = prio;
> +	rb_link_node(&p->node, rb, parent);
> +	rb_insert_color(&p->node, &engine->execlist_queue);
> +
> +	INIT_LIST_HEAD(&p->requests);
> +	list_add_tail(&pt->link, &p->requests);
> +
> +	if (first)
> +		engine->execlist_first = &p->node;
>
>  	return first;
>  }
> @@ -644,12 +701,16 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
>  	/* Will be called from irq-context when using foreign fences. */
>  	spin_lock_irqsave(&engine->timeline->lock, flags);
>
> -	if (insert_request(&request->priotree, &engine->execlist_queue)) {
> -		engine->execlist_first = &request->priotree.node;
> +	if (insert_request(engine,
> +			   &request->priotree,
> +			   request->priotree.priority)) {
>  		if (execlists_elsp_ready(engine))
>  			tasklet_hi_schedule(&engine->irq_tasklet);
>  	}
>
> +	GEM_BUG_ON(!engine->execlist_first);
> +	GEM_BUG_ON(list_empty(&request->priotree.link));
> +
>  	spin_unlock_irqrestore(&engine->timeline->lock, flags);
>  }
>
> @@ -734,10 +795,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  			continue;
>
>  		pt->priority = prio;
> -		if (!RB_EMPTY_NODE(&pt->node)) {
> -			rb_erase(&pt->node, &engine->execlist_queue);
> -			if (insert_request(pt, &engine->execlist_queue))
> -				engine->execlist_first = &pt->node;
> +		if (!list_empty(&pt->link)) {
> +			__list_del_entry(&pt->link);
> +			insert_request(engine, pt, prio);
>  		}
>  	}
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c380f3ac8a39..fab2322664f4 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -177,6 +177,12 @@ enum intel_engine_id {
>  	VECS
>  };
>
> +struct i915_priolist {
> +	struct rb_node node;
> +	struct list_head requests;
> +	int priority;
> +};
> +
>  #define INTEL_ENGINE_CS_MAX_NAME 8
>
>  struct intel_engine_cs {
> @@ -367,6 +373,8 @@ struct intel_engine_cs {
>
>  	/* Execlists */
>  	struct tasklet_struct irq_tasklet;
> +	struct i915_priolist default_priolist;
> +	bool no_priolist;
>  	struct execlist_port {
>  		struct drm_i915_gem_request *request_count;
>  #define EXECLIST_COUNT_BITS 2
> @@ -722,6 +730,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
>  bool intel_engine_is_idle(struct intel_engine_cs *engine);
>  bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
>
> +void intel_engines_mark_idle(struct drm_i915_private *i915);
>  void intel_engines_reset_default_submission(struct drm_i915_private *i915);
>
>  #endif /* _INTEL_RINGBUFFER_H_ */
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 09/12] drm/i915: Create a kmem_cache to allocate struct i915_priolist from
  2017-05-11 19:59 ` [PATCH 09/12] drm/i915: Create a kmem_cache to allocate struct i915_priolist from Chris Wilson
@ 2017-05-16  7:57   ` Tvrtko Ursulin
  0 siblings, 0 replies; 36+ messages in thread
From: Tvrtko Ursulin @ 2017-05-16  7:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 11/05/2017 20:59, Chris Wilson wrote:
> The i915_priolist are allocated within an atomic context on a path where
> we wish to minimise latency. If we use a dedicated kmem_cache, we have
> the advantage of a local freelist from which to service new requests
> that should keep the latency impact of an allocation small. Though
> currently we expect the majority of requests to be at default priority
> (and so hit the preallocate priolist), once userspace starts using
> priorities they are likely to use many fine grained policies improving
> the utilisation of a private slab.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            | 1 +
>  drivers/gpu/drm/i915/i915_gem.c            | 9 ++++++++-
>  drivers/gpu/drm/i915/i915_guc_submission.c | 2 +-
>  drivers/gpu/drm/i915/intel_lrc.c           | 4 ++--
>  4 files changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ff3574a56812..7f192efbe088 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2027,6 +2027,7 @@ struct drm_i915_private {
>  	struct kmem_cache *vmas;
>  	struct kmem_cache *requests;
>  	struct kmem_cache *dependencies;
> +	struct kmem_cache *priorities;
>
>  	const struct intel_device_info info;
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 75d7575b81f4..f52d72b1a5b4 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4871,12 +4871,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
>  	if (!dev_priv->dependencies)
>  		goto err_requests;
>
> +	dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
> +	if (!dev_priv->priorities)
> +		goto err_dependencies;
> +
>  	mutex_lock(&dev_priv->drm.struct_mutex);
>  	INIT_LIST_HEAD(&dev_priv->gt.timelines);
>  	err = i915_gem_timeline_init__global(dev_priv);
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>  	if (err)
> -		goto err_dependencies;
> +		goto err_priorities;
>
>  	INIT_LIST_HEAD(&dev_priv->context_list);
>  	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
> @@ -4900,6 +4904,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
>
>  	return 0;
>
> +err_priorities:
> +	kmem_cache_destroy(dev_priv->priorities);
>  err_dependencies:
>  	kmem_cache_destroy(dev_priv->dependencies);
>  err_requests:
> @@ -4923,6 +4929,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
>  	WARN_ON(!list_empty(&dev_priv->gt.timelines));
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>
> +	kmem_cache_destroy(dev_priv->priorities);
>  	kmem_cache_destroy(dev_priv->dependencies);
>  	kmem_cache_destroy(dev_priv->requests);
>  	kmem_cache_destroy(dev_priv->vmas);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 440bab856cc2..53c022c47687 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -704,7 +704,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
>  		rb_erase(&p->node, &engine->execlist_queue);
>  		INIT_LIST_HEAD(&p->requests);
>  		if (p->priority != I915_PRIORITY_NORMAL)
> -			kfree(p);
> +			kmem_cache_free(engine->i915->priorities, p);
>  	}
>  done:
>  	engine->execlist_first = rb;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 849b35796de7..7e41529bd074 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -500,7 +500,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  		rb_erase(&p->node, &engine->execlist_queue);
>  		INIT_LIST_HEAD(&p->requests);
>  		if (p->priority != I915_PRIORITY_NORMAL)
> -			kfree(p);
> +			kmem_cache_free(engine->i915->priorities, p);
>  	}
>  done:
>  	engine->execlist_first = rb;
> @@ -659,7 +659,7 @@ insert_request(struct intel_engine_cs *engine,
>  	if (prio == I915_PRIORITY_NORMAL) {
>  		p = &engine->default_priolist;
>  	} else {
> -		p = kmalloc(sizeof(*p), GFP_ATOMIC);
> +		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
>  		/* Convert an allocation failure to a priority bump */
>  		if (unlikely(!p)) {
>  			prio = I915_PRIORITY_NORMAL; /* recurses just once */
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2] drm/i915: Import the kfence selftests for i915_sw_fence
  2017-05-15 10:52   ` [PATCH v2] " Chris Wilson
@ 2017-05-16  8:55     ` Chris Wilson
  2017-05-16  9:15       ` Mika Kuoppala
  2017-05-16 14:49     ` Mika Kuoppala
  1 sibling, 1 reply; 36+ messages in thread
From: Chris Wilson @ 2017-05-16  8:55 UTC (permalink / raw)
  To: intel-gfx

On Mon, May 15, 2017 at 11:52:55AM +0100, Chris Wilson wrote:
> A long time ago, I wrote some selftests for the struct kfence idea. Now
> that we have infrastructure in i915/igt for running kselftests, include
> some for i915_sw_fence.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Anyone want to stamp these?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2] drm/i915: Import the kfence selftests for i915_sw_fence
  2017-05-16  8:55     ` Chris Wilson
@ 2017-05-16  9:15       ` Mika Kuoppala
  0 siblings, 0 replies; 36+ messages in thread
From: Mika Kuoppala @ 2017-05-16  9:15 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> On Mon, May 15, 2017 at 11:52:55AM +0100, Chris Wilson wrote:
>> A long time ago, I wrote some selftests for the struct kfence idea. Now
>> that we have infrastructure in i915/igt for running kselftests, include
>> some for i915_sw_fence.
>> 
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>
> Anyone want to stamp these?

kref has a stamp and I will take a look at this one.
-Mika


> -Chris
>
> -- 
> Chris Wilson, Intel Open Source Technology Centre
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH v2] drm/i915: Import the kfence selftests for i915_sw_fence
  2017-05-15 10:52   ` [PATCH v2] " Chris Wilson
  2017-05-16  8:55     ` Chris Wilson
@ 2017-05-16 14:49     ` Mika Kuoppala
  1 sibling, 0 replies; 36+ messages in thread
From: Mika Kuoppala @ 2017-05-16 14:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> A long time ago, I wrote some selftests for the struct kfence idea. Now
> that we have infrastructure in i915/igt for running kselftests, include
> some for i915_sw_fence.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Only minor thing that caught my eye was that I would
have preferred:

err_free_C: type of gotos instead of just err_C. In here
tho everything was straightfoward teardown of allocations
so it was clear.

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/Kconfig.debug                 |  12 +
>  drivers/gpu/drm/i915/i915_sw_fence.c               |   7 +-
>  .../gpu/drm/i915/selftests/i915_mock_selftests.h   |   1 +
>  drivers/gpu/drm/i915/selftests/i915_sw_fence.c     | 576 +++++++++++++++++++++
>  4 files changed, 595 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/i915/selftests/i915_sw_fence.c
>
> diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
> index b00edd3b8800..78c5c049a347 100644
> --- a/drivers/gpu/drm/i915/Kconfig.debug
> +++ b/drivers/gpu/drm/i915/Kconfig.debug
> @@ -61,6 +61,18 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS
>  
>            If in doubt, say "N".
>  
> +config DRM_I915_SW_FENCE_CHECK_DAG
> +        bool "Enable additional driver debugging for detecting dependency cycles"
> +        depends on DRM_I915
> +        default n
> +        help
> +          Choose this option to turn on extra driver debugging that may affect
> +          performance but will catch some internal issues.
> +
> +          Recommended for driver developers only.
> +
> +          If in doubt, say "N".
> +
>  config DRM_I915_SELFTEST
>  	bool "Enable selftests upon driver load"
>  	depends on DRM_I915
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> index a0a690d6627e..474d23c0c0ce 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.c
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -12,6 +12,7 @@
>  #include <linux/reservation.h>
>  
>  #include "i915_sw_fence.h"
> +#include "i915_selftest.h"
>  
>  #define I915_SW_FENCE_FLAG_ALLOC BIT(3) /* after WQ_FLAG_* for safety */
>  
> @@ -274,7 +275,7 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
>  	unsigned long flags;
>  	bool err;
>  
> -	if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG))
> +	if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
>  		return false;
>  
>  	spin_lock_irqsave(&i915_sw_fence_lock, flags);
> @@ -490,3 +491,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
>  
>  	return ret;
>  }
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> +#include "selftests/i915_sw_fence.c"
> +#endif
> diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> index 76c1f149a0a0..fc74687501ba 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> @@ -9,6 +9,7 @@
>   * Tests are executed in order by igt/drv_selftest
>   */
>  selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */
> +selftest(fence, i915_sw_fence_mock_selftests)
>  selftest(scatterlist, scatterlist_mock_selftests)
>  selftest(syncmap, i915_syncmap_mock_selftests)
>  selftest(uncore, intel_uncore_mock_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
> new file mode 100644
> index 000000000000..c552c23eecff
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
> @@ -0,0 +1,576 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include <linux/completion.h>
> +#include <linux/delay.h>
> +
> +#include "../i915_selftest.h"
> +
> +static int __i915_sw_fence_call
> +fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> +{
> +	switch (state) {
> +	case FENCE_COMPLETE:
> +		break;
> +
> +	case FENCE_FREE:
> +		/* Leave the fence for the caller to free it after testing */
> +		break;
> +	}
> +
> +	return NOTIFY_DONE;
> +}
> +
> +static struct i915_sw_fence *alloc_fence(void)
> +{
> +	struct i915_sw_fence *fence;
> +
> +	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
> +	if (!fence)
> +		return NULL;
> +
> +	i915_sw_fence_init(fence, fence_notify);
> +	return fence;
> +}
> +
> +static void free_fence(struct i915_sw_fence *fence)
> +{
> +	i915_sw_fence_fini(fence);
> +	kfree(fence);
> +}
> +
> +static int __test_self(struct i915_sw_fence *fence)
> +{
> +	if (i915_sw_fence_done(fence))
> +		return -EINVAL;
> +
> +	i915_sw_fence_commit(fence);
> +	if (!i915_sw_fence_done(fence))
> +		return -EINVAL;
> +
> +	i915_sw_fence_wait(fence);
> +	if (!i915_sw_fence_done(fence))
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +static int test_self(void *arg)
> +{
> +	struct i915_sw_fence *fence;
> +	int ret;
> +
> +	/* Test i915_sw_fence signaling and completion testing */
> +	fence = alloc_fence();
> +	if (!fence)
> +		return -ENOMEM;
> +
> +	ret = __test_self(fence);
> +
> +	free_fence(fence);
> +	return ret;
> +}
> +
> +static int test_dag(void *arg)
> +{
> +	struct i915_sw_fence *A, *B, *C;
> +	int ret = -EINVAL;
> +
> +	/* Test detection of cycles within the i915_sw_fence graphs */
> +	if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
> +		return 0;
> +
> +	A = alloc_fence();
> +	if (!A)
> +		return -ENOMEM;
> +
> +	if (i915_sw_fence_await_sw_fence_gfp(A, A, GFP_KERNEL) != -EINVAL) {
> +		pr_err("recursive cycle not detected (AA)\n");
> +		goto err_A;
> +	}
> +
> +	B = alloc_fence();
> +	if (!B) {
> +		ret = -ENOMEM;
> +		goto err_A;
> +	}
> +
> +	i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
> +	if (i915_sw_fence_await_sw_fence_gfp(B, A, GFP_KERNEL) != -EINVAL) {
> +		pr_err("single depth cycle not detected (BAB)\n");
> +		goto err_B;
> +	}
> +
> +	C = alloc_fence();
> +	if (i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL) == -EINVAL) {
> +		pr_err("invalid cycle detected\n");
> +		goto err_C;
> +	}
> +	if (i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL) != -EINVAL) {
> +		pr_err("single depth cycle not detected (CBC)\n");
> +		goto err_C;
> +	}
> +	if (i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL) != -EINVAL) {
> +		pr_err("cycle not detected (BA, CB, AC)\n");
> +		goto err_C;
> +	}
> +	if (i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL) == -EINVAL) {
> +		pr_err("invalid cycle detected\n");
> +		goto err_C;
> +	}
> +
> +	i915_sw_fence_commit(A);
> +	i915_sw_fence_commit(B);
> +	i915_sw_fence_commit(C);
> +
> +	ret = 0;
> +	if (!i915_sw_fence_done(C)) {
> +		pr_err("fence C not done\n");
> +		ret = -EINVAL;
> +	}
> +	if (!i915_sw_fence_done(B)) {
> +		pr_err("fence B not done\n");
> +		ret = -EINVAL;
> +	}
> +	if (!i915_sw_fence_done(A)) {
> +		pr_err("fence A not done\n");
> +		ret = -EINVAL;
> +	}
> +err_C:
> +	free_fence(C);
> +err_B:
> +	free_fence(B);
> +err_A:
> +	free_fence(A);
> +	return ret;
> +}
> +
> +static int test_AB(void *arg)
> +{
> +	struct i915_sw_fence *A, *B;
> +	int ret;
> +
> +	/* Test i915_sw_fence (A) waiting on an event source (B) */
> +	A = alloc_fence();
> +	if (!A)
> +		return -ENOMEM;
> +	B = alloc_fence();
> +	if (!B) {
> +		ret = -ENOMEM;
> +		goto err_A;
> +	}
> +
> +	ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
> +	if (ret < 0)
> +		goto err_B;
> +	if (ret == 0) {
> +		pr_err("Incorrectly reported fence A was complete before await\n");
> +		ret = -EINVAL;
> +		goto err_B;
> +	}
> +
> +	ret = -EINVAL;
> +	i915_sw_fence_commit(A);
> +	if (i915_sw_fence_done(A))
> +		goto err_B;
> +
> +	i915_sw_fence_commit(B);
> +	if (!i915_sw_fence_done(B)) {
> +		pr_err("Fence B is not done\n");
> +		goto err_B;
> +	}
> +
> +	if (!i915_sw_fence_done(A)) {
> +		pr_err("Fence A is not done\n");
> +		goto err_B;
> +	}
> +
> +	ret = 0;
> +err_B:
> +	free_fence(B);
> +err_A:
> +	free_fence(A);
> +	return ret;
> +}
> +
> +static int test_ABC(void *arg)
> +{
> +	struct i915_sw_fence *A, *B, *C;
> +	int ret;
> +
> +	/* Test a chain of fences, A waits on B who waits on C */
> +	A = alloc_fence();
> +	if (!A)
> +		return -ENOMEM;
> +
> +	B = alloc_fence();
> +	if (!B) {
> +		ret = -ENOMEM;
> +		goto err_A;
> +	}
> +
> +	C = alloc_fence();
> +	if (!C) {
> +		ret = -ENOMEM;
> +		goto err_B;
> +	}
> +
> +	ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
> +	if (ret < 0)
> +		goto err_C;
> +	if (ret == 0) {
> +		pr_err("Incorrectly reported fence B was complete before await\n");
> +		goto err_C;
> +	}
> +
> +	ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL);
> +	if (ret < 0)
> +		goto err_C;
> +	if (ret == 0) {
> +		pr_err("Incorrectly reported fence C was complete before await\n");
> +		goto err_C;
> +	}
> +
> +	ret = -EINVAL;
> +	i915_sw_fence_commit(A);
> +	if (i915_sw_fence_done(A)) {
> +		pr_err("Fence A completed early\n");
> +		goto err_C;
> +	}
> +
> +	i915_sw_fence_commit(B);
> +	if (i915_sw_fence_done(B)) {
> +		pr_err("Fence B completed early\n");
> +		goto err_C;
> +	}
> +
> +	if (i915_sw_fence_done(A)) {
> +		pr_err("Fence A completed early (after signaling B)\n");
> +		goto err_C;
> +	}
> +
> +	i915_sw_fence_commit(C);
> +
> +	ret = 0;
> +	if (!i915_sw_fence_done(C)) {
> +		pr_err("Fence C not done\n");
> +		ret = -EINVAL;
> +	}
> +	if (!i915_sw_fence_done(B)) {
> +		pr_err("Fence B not done\n");
> +		ret = -EINVAL;
> +	}
> +	if (!i915_sw_fence_done(A)) {
> +		pr_err("Fence A not done\n");
> +		ret = -EINVAL;
> +	}
> +err_C:
> +	free_fence(C);
> +err_B:
> +	free_fence(B);
> +err_A:
> +	free_fence(A);
> +	return ret;
> +}
> +
> +static int test_AB_C(void *arg)
> +{
> +	struct i915_sw_fence *A, *B, *C;
> +	int ret = -EINVAL;
> +
> +	/* Test multiple fences (AB) waiting on a single event (C) */
> +	A = alloc_fence();
> +	if (!A)
> +		return -ENOMEM;
> +
> +	B = alloc_fence();
> +	if (!B) {
> +		ret = -ENOMEM;
> +		goto err_A;
> +	}
> +
> +	C = alloc_fence();
> +	if (!B) {
> +		ret = -ENOMEM;
> +		goto err_B;
> +	}
> +
> +	ret = i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL);
> +	if (ret < 0)
> +		goto err_C;
> +	if (ret == 0) {
> +		ret = -EINVAL;
> +		goto err_C;
> +	}
> +
> +	ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL);
> +	if (ret < 0)
> +		goto err_C;
> +	if (ret == 0) {
> +		ret = -EINVAL;
> +		goto err_C;
> +	}
> +
> +	i915_sw_fence_commit(A);
> +	i915_sw_fence_commit(B);
> +
> +	ret = 0;
> +	if (i915_sw_fence_done(A)) {
> +		pr_err("Fence A completed early\n");
> +		ret = -EINVAL;
> +	}
> +
> +	if (i915_sw_fence_done(B)) {
> +		pr_err("Fence B completed early\n");
> +		ret = -EINVAL;
> +	}
> +
> +	i915_sw_fence_commit(C);
> +	if (!i915_sw_fence_done(C)) {
> +		pr_err("Fence C not done\n");
> +		ret = -EINVAL;
> +	}
> +
> +	if (!i915_sw_fence_done(B)) {
> +		pr_err("Fence B not done\n");
> +		ret = -EINVAL;
> +	}
> +
> +	if (!i915_sw_fence_done(A)) {
> +		pr_err("Fence A not done\n");
> +		ret = -EINVAL;
> +	}
> +
> +err_C:
> +	free_fence(C);
> +err_B:
> +	free_fence(B);
> +err_A:
> +	free_fence(A);
> +	return ret;
> +}
> +
> +static int test_C_AB(void *arg)
> +{
> +	struct i915_sw_fence *A, *B, *C;
> +	int ret;
> +
> +	/* Test multiple event sources (A,B) for a single fence (C) */
> +	A = alloc_fence();
> +	if (!A)
> +		return -ENOMEM;
> +
> +	B = alloc_fence();
> +	if (!B) {
> +		ret = -ENOMEM;
> +		goto err_A;
> +	}
> +
> +	C = alloc_fence();
> +	if (!B) {
> +		ret = -ENOMEM;
> +		goto err_B;
> +	}
> +
> +	ret = i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL);
> +	if (ret < 0)
> +		goto err_C;
> +	if (ret == 0) {
> +		ret = -EINVAL;
> +		goto err_C;
> +	}
> +
> +	ret = i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL);
> +	if (ret < 0)
> +		goto err_C;
> +	if (ret == 0) {
> +		ret = -EINVAL;
> +		goto err_C;
> +	}
> +
> +	ret = 0;
> +	i915_sw_fence_commit(C);
> +	if (i915_sw_fence_done(C))
> +		ret = -EINVAL;
> +
> +	i915_sw_fence_commit(A);
> +	i915_sw_fence_commit(B);
> +
> +	if (!i915_sw_fence_done(A)) {
> +		pr_err("Fence A not done\n");
> +		ret = -EINVAL;
> +	}
> +
> +	if (!i915_sw_fence_done(B)) {
> +		pr_err("Fence B not done\n");
> +		ret = -EINVAL;
> +	}
> +
> +	if (!i915_sw_fence_done(C)) {
> +		pr_err("Fence C not done\n");
> +		ret = -EINVAL;
> +	}
> +
> +err_C:
> +	free_fence(C);
> +err_B:
> +	free_fence(B);
> +err_A:
> +	free_fence(A);
> +	return ret;
> +}
> +
> +static int test_chain(void *arg)
> +{
> +	int nfences = 4096;
> +	struct i915_sw_fence **fences;
> +	int ret, i;
> +
> +	/* Test a long chain of fences */
> +	fences = kmalloc_array(nfences, sizeof(*fences), GFP_KERNEL);
> +	if (!fences)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < nfences; i++) {
> +		fences[i] = alloc_fence();
> +		if (!fences[i]) {
> +			nfences = i;
> +			ret = -ENOMEM;
> +			goto err;
> +		}
> +
> +		if (i > 0) {
> +			ret = i915_sw_fence_await_sw_fence_gfp(fences[i],
> +							       fences[i - 1],
> +							       GFP_KERNEL);
> +			if (ret < 0) {
> +				nfences = i + 1;
> +				goto err;
> +			}
> +
> +			i915_sw_fence_commit(fences[i]);
> +		}
> +	}
> +
> +	ret = 0;
> +	for (i = nfences; --i; ) {
> +		if (i915_sw_fence_done(fences[i])) {
> +			if (ret == 0)
> +				pr_err("Fence[%d] completed early\n", i);
> +			ret = -EINVAL;
> +		}
> +	}
> +	i915_sw_fence_commit(fences[0]);
> +	for (i = 0; ret == 0 && i < nfences; i++) {
> +		if (!i915_sw_fence_done(fences[i])) {
> +			pr_err("Fence[%d] is not done\n", i);
> +			ret = -EINVAL;
> +		}
> +	}
> +
> +err:
> +	for (i = 0; i < nfences; i++)
> +		free_fence(fences[i]);
> +	kfree(fences);
> +	return ret;
> +}
> +
> +struct task_ipc {
> +	struct work_struct work;
> +	struct completion started;
> +	struct i915_sw_fence *in, *out;
> +	int value;
> +};
> +
> +static void task_ipc(struct work_struct *work)
> +{
> +	struct task_ipc *ipc = container_of(work, typeof(*ipc), work);
> +
> +	complete(&ipc->started);
> +
> +	i915_sw_fence_wait(ipc->in);
> +	smp_store_mb(ipc->value, 1);
> +	i915_sw_fence_commit(ipc->out);
> +}
> +
> +static int test_ipc(void *arg)
> +{
> +	struct task_ipc ipc;
> +	int ret = 0;
> +
> +	/* Test use of i915_sw_fence as an interprocess signaling mechanism */
> +	ipc.in = alloc_fence();
> +	if (!ipc.in)
> +		return -ENOMEM;
> +	ipc.out = alloc_fence();
> +	if (!ipc.out) {
> +		ret = -ENOMEM;
> +		goto err_in;
> +	}
> +
> +	/* use a completion to avoid chicken-and-egg testing */
> +	init_completion(&ipc.started);
> +
> +	ipc.value = 0;
> +	INIT_WORK(&ipc.work, task_ipc);
> +	schedule_work(&ipc.work);
> +
> +	wait_for_completion(&ipc.started);
> +
> +	usleep_range(1000, 2000);
> +	if (READ_ONCE(ipc.value)) {
> +		pr_err("worker updated value before i915_sw_fence was signaled\n");
> +		ret = -EINVAL;
> +	}
> +
> +	i915_sw_fence_commit(ipc.in);
> +	i915_sw_fence_wait(ipc.out);
> +
> +	if (!READ_ONCE(ipc.value)) {
> +		pr_err("worker signaled i915_sw_fence before value was posted\n");
> +		ret = -EINVAL;
> +	}
> +
> +	flush_work(&ipc.work);
> +	free_fence(ipc.out);
> +err_in:
> +	free_fence(ipc.in);
> +	return ret;
> +}
> +
> +int i915_sw_fence_mock_selftests(void)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(test_self),
> +		SUBTEST(test_dag),
> +		SUBTEST(test_AB),
> +		SUBTEST(test_ABC),
> +		SUBTEST(test_AB_C),
> +		SUBTEST(test_C_AB),
> +		SUBTEST(test_chain),
> +		SUBTEST(test_ipc),
> +	};
> +
> +	return i915_subtests(tests, NULL);
> +}
> -- 
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2017-05-17 11:17 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-05-11 19:59 [PATCH 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
2017-05-11 19:59 ` [PATCH 02/12] drm/i915: Import the kfence selftests for i915_sw_fence Chris Wilson
2017-05-15 10:52   ` [PATCH v2] " Chris Wilson
2017-05-16  8:55     ` Chris Wilson
2017-05-16  9:15       ` Mika Kuoppala
2017-05-16 14:49     ` Mika Kuoppala
2017-05-11 19:59 ` [PATCH 03/12] drm/i915: Make ptr_unpack_bits() more function-like Chris Wilson
2017-05-11 19:59 ` [PATCH 04/12] drm/i915: Redefine ptr_pack_bits() and friends Chris Wilson
2017-05-11 19:59 ` [PATCH 05/12] drm/i915/execlists: Pack the count into the low bits of the port.request Chris Wilson
2017-05-11 19:59 ` [PATCH 06/12] drm/i915: Don't mark an execlists context-switch when idle Chris Wilson
2017-05-11 19:59 ` [PATCH 07/12] drm/i915: Use a define for the default priority [0] Chris Wilson
2017-05-12  8:02   ` Mika Kuoppala
2017-05-11 19:59 ` [PATCH 08/12] drm/i915: Split execlist priority queue into rbtree + linked list Chris Wilson
2017-05-15 11:45   ` Tvrtko Ursulin
2017-05-15 12:26     ` Chris Wilson
2017-05-15 12:56   ` [PATCH v2] " Chris Wilson
2017-05-15 13:27     ` Chris Wilson
2017-05-15 13:29     ` [PATCH v3] " Chris Wilson
2017-05-15 14:51       ` Michał Winiarski
2017-05-16  7:55       ` Tvrtko Ursulin
2017-05-11 19:59 ` [PATCH 09/12] drm/i915: Create a kmem_cache to allocate struct i915_priolist from Chris Wilson
2017-05-16  7:57   ` Tvrtko Ursulin
2017-05-11 19:59 ` [PATCH 10/12] drm/i915/execlists: Reduce lock contention between schedule/submit_request Chris Wilson
2017-05-15 10:51   ` Tvrtko Ursulin
2017-05-15 10:55     ` Chris Wilson
2017-05-15 11:46       ` Tvrtko Ursulin
2017-05-11 19:59 ` [PATCH 11/12] drm/i915: Stop inlining the execlists IRQ handler Chris Wilson
2017-05-11 19:59 ` [PATCH 12/12] drm/i915: Don't force serialisation on marking up execlists irq posted Chris Wilson
2017-05-12  7:16   ` Mika Kuoppala
2017-05-15 10:33   ` Tvrtko Ursulin
2017-05-11 20:17 ` ✓ Fi.CI.BAT: success for series starting with [01/12] drm/i915: Remove kref from i915_sw_fence Patchwork
2017-05-12  7:43 ` [PATCH 01/12] " Mika Kuoppala
2017-05-12  8:19   ` Chris Wilson
2017-05-12 12:13 ` Mika Kuoppala
2017-05-15 11:47 ` ✓ Fi.CI.BAT: success for series starting with [01/12] drm/i915: Remove kref from i915_sw_fence (rev2) Patchwork
2017-05-15 14:01 ` ✓ Fi.CI.BAT: success for series starting with [01/12] drm/i915: Remove kref from i915_sw_fence (rev4) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.