[PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake

public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed

* [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake
@ 2017-09-28 19:38 Chris Wilson
  2017-09-28 19:38 ` [PATCH v3 02/13] drm/i915/execlists: Move request unwinding to a separate function Chris Wilson
                   ` (16 more replies)
  0 siblings, 17 replies; 30+ messages in thread
From: Chris Wilson @ 2017-09-28 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: Rodrigo Vivi, Paulo Zanoni

I recently tried to update the gen9 feature matrix and to my unpleasant
surprise found that Kabylake still acted like Broadwell and didn't
enable the feature. This is because kbl/cfl are inheriting their
defaults from Broadwell and not Skylake.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_pci.c | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index da60866b6628..01d4b569b2cc 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -495,13 +495,9 @@ static const struct intel_device_info intel_geminilake_info __initconst = {
 };
 
 #define KBL_PLATFORM \
-	BDW_FEATURES, \
-	.gen = 9, \
+	SKL_PLATFORM, \
 	.platform = INTEL_KABYLAKE, \
-	.has_csr = 1, \
-	.has_guc = 1, \
-	.has_ipc = 1, \
-	.ddb_size = 896
+	.has_ipc = 1
 
 static const struct intel_device_info intel_kabylake_gt1_info __initconst = {
 	KBL_PLATFORM,
@@ -520,13 +516,8 @@ static const struct intel_device_info intel_kabylake_gt3_info __initconst = {
 };
 
 #define CFL_PLATFORM \
-	BDW_FEATURES, \
-	.gen = 9, \
-	.platform = INTEL_COFFEELAKE, \
-	.has_csr = 1, \
-	.has_guc = 1, \
-	.has_ipc = 1, \
-	.ddb_size = 896
+	KBL_PLATFORM, \
+	.platform = INTEL_COFFEELAKE
 
 static const struct intel_device_info intel_coffeelake_gt1_info __initconst = {
 	CFL_PLATFORM,
@@ -545,14 +536,12 @@ static const struct intel_device_info intel_coffeelake_gt3_info __initconst = {
 };
 
 static const struct intel_device_info intel_cannonlake_gt2_info __initconst = {
-	BDW_FEATURES,
+	SKL_PLATFORM,
 	.is_alpha_support = 1,
 	.platform = INTEL_CANNONLAKE,
 	.gen = 10,
 	.gt = 2,
 	.ddb_size = 1024,
-	.has_csr = 1,
-	.has_ipc = 1,
 	.color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 }
 };
 
-- 
2.14.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v3 02/13] drm/i915/execlists: Move request unwinding to a separate function
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
@ 2017-09-28 19:38 ` Chris Wilson
  2017-09-28 19:39 ` [PATCH v3 03/13] drm/i915: Give the invalid priority a magic name Chris Wilson
                   ` (15 subsequent siblings)
  16 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-09-28 19:38 UTC (permalink / raw)
  To: intel-gfx

In the future, we will want to unwind requests following a preemption
point. This requires the same steps as for unwinding upon a reset, so
extract the existing code to a separate function for later use.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++---------------
 1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 61cac26a8b05..cbac2fff8e4d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -210,6 +210,7 @@
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 
 #define WA_TAIL_DWORDS 2
+#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_engine_cs *engine);
@@ -348,6 +349,37 @@ lookup_priolist(struct intel_engine_cs *engine,
 	return ptr_pack_bits(p, first, 1);
 }
 
+static void unwind_wa_tail(struct drm_i915_gem_request *rq)
+{
+	rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
+	assert_ring_tail_valid(rq->ring, rq->tail);
+}
+
+static void unwind_incomplete_requests(struct intel_engine_cs *engine)
+{
+	struct drm_i915_gem_request *rq, *rn;
+
+	lockdep_assert_held(&engine->timeline->lock);
+
+	list_for_each_entry_safe_reverse(rq, rn,
+					 &engine->timeline->requests,
+					 link) {
+		struct i915_priolist *p;
+
+		if (i915_gem_request_completed(rq))
+			return;
+
+		__i915_gem_request_unsubmit(rq);
+		unwind_wa_tail(rq);
+
+		p = lookup_priolist(engine,
+				    &rq->priotree,
+				    rq->priotree.priority);
+		list_add(&rq->priotree.link,
+			 &ptr_mask_bits(p, 1)->requests);
+	}
+}
+
 static inline void
 execlists_context_status_change(struct drm_i915_gem_request *rq,
 				unsigned long status)
@@ -1382,7 +1414,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 			      struct drm_i915_gem_request *request)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct drm_i915_gem_request *rq, *rn;
 	struct intel_context *ce;
 	unsigned long flags;
 
@@ -1400,21 +1431,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	execlist_cancel_port_requests(execlists);
 
 	/* Push back any incomplete requests for replay after the reset. */
-	list_for_each_entry_safe_reverse(rq, rn,
-					 &engine->timeline->requests, link) {
-		struct i915_priolist *p;
-
-		if (i915_gem_request_completed(rq))
-			break;
-
-		__i915_gem_request_unsubmit(rq);
-
-		p = lookup_priolist(engine,
-				    &rq->priotree,
-				    rq->priotree.priority);
-		list_add(&rq->priotree.link,
-			 &ptr_mask_bits(p, 1)->requests);
-	}
+	unwind_incomplete_requests(engine);
 
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 
@@ -1451,10 +1468,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	intel_ring_update_space(request->ring);
 
 	/* Reset WaIdleLiteRestore:bdw,skl as well */
-	request->tail =
-		intel_ring_wrap(request->ring,
-				request->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
-	assert_ring_tail_valid(request->ring, request->tail);
+	unwind_wa_tail(request);
 }
 
 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
-- 
2.14.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v3 03/13] drm/i915: Give the invalid priority a magic name
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
  2017-09-28 19:38 ` [PATCH v3 02/13] drm/i915/execlists: Move request unwinding to a separate function Chris Wilson
@ 2017-09-28 19:39 ` Chris Wilson
  2017-09-29  6:39   ` Mika Kuoppala
  2017-09-29  7:01   ` Joonas Lahtinen
  2017-09-28 19:39 ` [PATCH v3 04/13] drm/i915/execlists: Cache the last priolist lookup Chris Wilson
                   ` (14 subsequent siblings)
  16 siblings, 2 replies; 30+ messages in thread
From: Chris Wilson @ 2017-09-28 19:39 UTC (permalink / raw)
  To: intel-gfx

We use INT_MIN to denote the priority of a request that has not been
submitted to the scheduler; we treat INT_MIN as an invalid priority and
initialise the request to it. Give the value a name so it stands out.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 2 +-
 drivers/gpu/drm/i915/i915_gem_request.h | 1 +
 drivers/gpu/drm/i915/intel_lrc.c        | 4 +++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 4eb1a76731b2..14956d899911 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -186,7 +186,7 @@ i915_priotree_init(struct i915_priotree *pt)
 	INIT_LIST_HEAD(&pt->signalers_list);
 	INIT_LIST_HEAD(&pt->waiters_list);
 	INIT_LIST_HEAD(&pt->link);
-	pt->priority = INT_MIN;
+	pt->priority = I915_PRIORITY_INVALID;
 }
 
 static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 96eb52471dad..6b9e992d01de 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -72,6 +72,7 @@ struct i915_priotree {
 #define I915_PRIORITY_MAX 1024
 #define I915_PRIORITY_NORMAL 0
 #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
+#define I915_PRIORITY_INVALID INT_MIN
 };
 
 struct i915_gem_capture_list {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cbac2fff8e4d..303bb2c0b3ce 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -863,6 +863,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	struct i915_dependency stack;
 	LIST_HEAD(dfs);
 
+	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
+
 	if (prio <= READ_ONCE(request->priotree.priority))
 		return;
 
@@ -911,7 +913,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	 * execlists_submit_request()), we can set our own priority and skip
 	 * acquiring the engine locks.
 	 */
-	if (request->priotree.priority == INT_MIN) {
+	if (request->priotree.priority == I915_PRIORITY_INVALID) {
 		GEM_BUG_ON(!list_empty(&request->priotree.link));
 		request->priotree.priority = prio;
 		if (stack.dfs_link.next == stack.dfs_link.prev)
-- 
2.14.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 03/13] drm/i915: Give the invalid priority a magic name
  2017-09-28 19:39 ` [PATCH v3 03/13] drm/i915: Give the invalid priority a magic name Chris Wilson
@ 2017-09-29  6:39   ` Mika Kuoppala
  2017-09-29  7:01   ` Joonas Lahtinen
  1 sibling, 0 replies; 30+ messages in thread
From: Mika Kuoppala @ 2017-09-29  6:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We use INT_MIN to denote the priority of a request that has not been
> submitted to the scheduler; we treat INT_MIN as an invalid priority and
> initialise the request to it. Give the value a name so it stands out.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gem_request.c | 2 +-
>  drivers/gpu/drm/i915/i915_gem_request.h | 1 +
>  drivers/gpu/drm/i915/intel_lrc.c        | 4 +++-
>  3 files changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 4eb1a76731b2..14956d899911 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -186,7 +186,7 @@ i915_priotree_init(struct i915_priotree *pt)
>  	INIT_LIST_HEAD(&pt->signalers_list);
>  	INIT_LIST_HEAD(&pt->waiters_list);
>  	INIT_LIST_HEAD(&pt->link);
> -	pt->priority = INT_MIN;
> +	pt->priority = I915_PRIORITY_INVALID;
>  }
>  
>  static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 96eb52471dad..6b9e992d01de 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -72,6 +72,7 @@ struct i915_priotree {
>  #define I915_PRIORITY_MAX 1024
>  #define I915_PRIORITY_NORMAL 0
>  #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
> +#define I915_PRIORITY_INVALID INT_MIN
>  };
>  
>  struct i915_gem_capture_list {
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index cbac2fff8e4d..303bb2c0b3ce 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -863,6 +863,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  	struct i915_dependency stack;
>  	LIST_HEAD(dfs);
>  
> +	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
> +
>  	if (prio <= READ_ONCE(request->priotree.priority))
>  		return;
>  
> @@ -911,7 +913,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  	 * execlists_submit_request()), we can set our own priority and skip
>  	 * acquiring the engine locks.
>  	 */
> -	if (request->priotree.priority == INT_MIN) {
> +	if (request->priotree.priority == I915_PRIORITY_INVALID) {
>  		GEM_BUG_ON(!list_empty(&request->priotree.link));
>  		request->priotree.priority = prio;
>  		if (stack.dfs_link.next == stack.dfs_link.prev)
> -- 
> 2.14.2
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 03/13] drm/i915: Give the invalid priority a magic name
  2017-09-28 19:39 ` [PATCH v3 03/13] drm/i915: Give the invalid priority a magic name Chris Wilson
  2017-09-29  6:39   ` Mika Kuoppala
@ 2017-09-29  7:01   ` Joonas Lahtinen
  1 sibling, 0 replies; 30+ messages in thread
From: Joonas Lahtinen @ 2017-09-29  7:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Thu, 2017-09-28 at 20:39 +0100, Chris Wilson wrote:
> We use INT_MIN to denote the priority of a request that has not been
> submitted to the scheduler; we treat INT_MIN as an invalid priority and
> initialise the request to it. Give the value a name so it stands out.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH v3 04/13] drm/i915/execlists: Cache the last priolist lookup
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
  2017-09-28 19:38 ` [PATCH v3 02/13] drm/i915/execlists: Move request unwinding to a separate function Chris Wilson
  2017-09-28 19:39 ` [PATCH v3 03/13] drm/i915: Give the invalid priority a magic name Chris Wilson
@ 2017-09-28 19:39 ` Chris Wilson
  2017-09-29 11:45   ` Chris Wilson
  2017-09-28 19:39 ` [PATCH v3 05/13] drm/i915/preempt: Fix WaEnablePreemptionGranularityControlByUMD Chris Wilson
                   ` (13 subsequent siblings)
  16 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2017-09-28 19:39 UTC (permalink / raw)
  To: intel-gfx

From: Michał Winiarski <michal.winiarski@intel.com>

Avoid the repeated rbtree lookup for each request as we unwind them by
tracking the last priolist.

v2: Fix up my unhelpful suggestion of using default_priolist.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 303bb2c0b3ce..7d6da130b184 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -358,25 +358,31 @@ static void unwind_wa_tail(struct drm_i915_gem_request *rq)
 static void unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *rq, *rn;
+	struct i915_priolist *uninitialized_var(p);
+	int last_prio = I915_PRIORITY_INVALID;
 
 	lockdep_assert_held(&engine->timeline->lock);
 
 	list_for_each_entry_safe_reverse(rq, rn,
 					 &engine->timeline->requests,
 					 link) {
-		struct i915_priolist *p;
-
 		if (i915_gem_request_completed(rq))
 			return;
 
 		__i915_gem_request_unsubmit(rq);
 		unwind_wa_tail(rq);
 
-		p = lookup_priolist(engine,
-				    &rq->priotree,
-				    rq->priotree.priority);
-		list_add(&rq->priotree.link,
-			 &ptr_mask_bits(p, 1)->requests);
+		GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID);
+		if (rq->priotree.priority != last_prio) {
+			p = lookup_priolist(engine,
+					    &rq->priotree,
+					    rq->priotree.priority);
+			p = ptr_mask_bits(p, 1);
+
+			last_prio = rq->priotree.priority;
+		}
+
+		list_add(&rq->priotree.link, &p->requests);
 	}
 }
 
-- 
2.14.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 04/13] drm/i915/execlists: Cache the last priolist lookup
  2017-09-28 19:39 ` [PATCH v3 04/13] drm/i915/execlists: Cache the last priolist lookup Chris Wilson
@ 2017-09-29 11:45   ` Chris Wilson
  0 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-09-29 11:45 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2017-09-28 20:39:01)
> From: Michał Winiarski <michal.winiarski@intel.com>
> 
> Avoid the repeated rbtree lookup for each request as we unwind them by
> tracking the last priolist.
> 
> v2: Fix up my unhelpful suggestion of using default_priolist.
> 
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Merged the first couple of patches (not the inheritance) to split out
unwind_incomplete_requests; as a refactor it can stand on its own.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH v3 05/13] drm/i915/preempt: Fix WaEnablePreemptionGranularityControlByUMD
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
                   ` (2 preceding siblings ...)
  2017-09-28 19:39 ` [PATCH v3 04/13] drm/i915/execlists: Cache the last priolist lookup Chris Wilson
@ 2017-09-28 19:39 ` Chris Wilson
  2017-09-28 19:39 ` [PATCH v3 06/13] drm/i915/preempt: Default to disabled mid-command preemption levels Chris Wilson
                   ` (12 subsequent siblings)
  16 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-09-28 19:39 UTC (permalink / raw)
  To: intel-gfx

From: Jeff McGee <jeff.mcgee@intel.com>

The WA applies to all production Gen9 and requires both enabling and
whitelisting of the per-context preemption control register.

v2: Extend to Cannonlake.

Signed-off-by: Jeff McGee <jeff.mcgee@intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_engine_cs.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index a28e2a864cf1..040c85e496fa 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1076,8 +1076,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
 	if (ret)
 		return ret;
 
-	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl */
-	ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
+	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
+	I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
+		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
+	ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
 	if (ret)
 		return ret;
 
@@ -1139,14 +1141,6 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
 	if (ret)
 		return ret;
 
-	/*
-	 * Actual WA is to disable percontext preemption granularity control
-	 * until D0 which is the default case so this is equivalent to
-	 * !WaDisablePerCtxtPreemptionGranularityControl:skl
-	 */
-	I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
-		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
-
 	/* WaEnableGapsTsvCreditFix:skl */
 	I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
 				   GEN9_GAPS_TSV_CREDIT_DISABLE));
@@ -1279,6 +1273,8 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine)
 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
 
 	/* WaEnablePreemptionGranularityControlByUMD:cnl */
+	I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
+		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
 	ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
 	if (ret)
 		return ret;
-- 
2.14.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v3 06/13] drm/i915/preempt: Default to disabled mid-command preemption levels
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
                   ` (3 preceding siblings ...)
  2017-09-28 19:39 ` [PATCH v3 05/13] drm/i915/preempt: Fix WaEnablePreemptionGranularityControlByUMD Chris Wilson
@ 2017-09-28 19:39 ` Chris Wilson
  2017-09-29  7:18   ` Joonas Lahtinen
  2017-09-28 19:39 ` [PATCH v3 07/13] drm/i915/execlists: Distinguish the incomplete context notifies Chris Wilson
                   ` (11 subsequent siblings)
  16 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2017-09-28 19:39 UTC (permalink / raw)
  To: intel-gfx

From: Michał Winiarski <michal.winiarski@intel.com>

Supporting fine-granularity preemption levels may require changes in
userspace batch buffer programming. Therefore, we need to fallback to
safe default values, rather that use hardware defaults. Userspace is
still able to enable fine-granularity, since we're whitelisting the
register controlling it in WaEnablePreemptionGranularityControlByUMD.

v2: Extend w/a to cover Cannonlake

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h        |  6 ++++++
 drivers/gpu/drm/i915/intel_engine_cs.c | 25 +++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ee0d4f14ac98..70465320a0ed 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -6993,6 +6993,12 @@ enum {
 #define GEN9_CS_DEBUG_MODE1		_MMIO(0x20ec)
 #define GEN9_CTX_PREEMPT_REG		_MMIO(0x2248)
 #define GEN8_CS_CHICKEN1		_MMIO(0x2580)
+#define GEN9_PREEMPT_3D_OBJECT_LEVEL		(1<<0)
+#define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo)	(((hi) << 2) | ((lo) << 1))
+#define GEN9_PREEMPT_GPGPU_MID_THREAD_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(0, 0)
+#define GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(0, 1)
+#define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(1, 0)
+#define GEN9_PREEMPT_GPGPU_LEVEL_MASK		GEN9_PREEMPT_GPGPU_LEVEL(1, 1)
 
 /* GEN7 chicken */
 #define GEN7_COMMON_SLICE_CHICKEN1		_MMIO(0x7010)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 040c85e496fa..2460d78581b4 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1071,6 +1071,24 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
 	I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
 				    GEN8_LQSC_FLUSH_COHERENT_LINES));
 
+	/*
+	 * Supporting preemption with fine-granularity requires changes in the
+	 * batch buffer programming. Since we can't break old userspace, we
+	 * need to set our default preemption level to safe value. Userspace is
+	 * still able to use more fine-grained preemption levels, since in
+	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
+	 * per-ctx register. As such, WaDisableMidCmdPreemption is not a real
+	 * HW workaround, but merely a way to start using preemption while
+	 * maintaining old contract with userspace.
+	 */
+
+	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
+	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
+
+	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
+	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
+
 	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
 	ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
 	if (ret)
@@ -1272,6 +1290,13 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine)
 	/* FtrEnableFastAnisoL1BankingFix: cnl */
 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
 
+	/* WaDisable3DMidCmdPreemption:cnl */
+	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
+
+	/* WaDisableGPGPUMidCmdPreemption:cnl */
+	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
+
 	/* WaEnablePreemptionGranularityControlByUMD:cnl */
 	I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
 		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
-- 
2.14.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 06/13] drm/i915/preempt: Default to disabled mid-command preemption levels
  2017-09-28 19:39 ` [PATCH v3 06/13] drm/i915/preempt: Default to disabled mid-command preemption levels Chris Wilson
@ 2017-09-29  7:18   ` Joonas Lahtinen
  0 siblings, 0 replies; 30+ messages in thread
From: Joonas Lahtinen @ 2017-09-29  7:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Thu, 2017-09-28 at 20:39 +0100, Chris Wilson wrote:
> WaDisableMidCmdPreemption

"WaDisable{3D,GPGPU}MidCmdPreemption are not ..."

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH v3 07/13] drm/i915/execlists: Distinguish the incomplete context notifies
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
                   ` (4 preceding siblings ...)
  2017-09-28 19:39 ` [PATCH v3 06/13] drm/i915/preempt: Default to disabled mid-command preemption levels Chris Wilson
@ 2017-09-28 19:39 ` Chris Wilson
  2017-09-28 19:39 ` [PATCH v3 08/13] drm/i915: Introduce a preempt context Chris Wilson
                   ` (10 subsequent siblings)
  16 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-09-28 19:39 UTC (permalink / raw)
  To: intel-gfx

Let the listener know that the context we just scheduled out was not
complete, and will be scheduled back in at a later point.

v2: Handle CONTEXT_STATUS_PREEMPTED in gvt by aliasing it to
CONTEXT_STATUS_OUT for the moment, gvt can expand upon the difference
later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Zhenyu Wang" <zhenyuw@linux.intel.com>
Cc: "Wang, Zhi A" <zhi.a.wang@intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 1 +
 drivers/gpu/drm/i915/intel_lrc.c     | 2 +-
 drivers/gpu/drm/i915/intel_lrc.h     | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index d5892d24f0b6..f6ded475bb2c 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -174,6 +174,7 @@ static int shadow_context_status_change(struct notifier_block *nb,
 		atomic_set(&workload->shadow_ctx_active, 1);
 		break;
 	case INTEL_CONTEXT_SCHEDULE_OUT:
+	case INTEL_CONTEXT_SCHEDULE_PREEMPTED:
 		atomic_set(&workload->shadow_ctx_active, 0);
 		break;
 	default:
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7d6da130b184..0f6c839d65a1 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -618,7 +618,7 @@ execlist_cancel_port_requests(struct intel_engine_execlists *execlists)
 	while (num_ports-- && port_isset(port)) {
 		struct drm_i915_gem_request *rq = port_request(port);
 
-		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED);
 		i915_gem_request_put(rq);
 
 		memset(port, 0, sizeof(*port));
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 314adee7127a..689fde1a63a9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -61,6 +61,7 @@
 enum {
 	INTEL_CONTEXT_SCHEDULE_IN = 0,
 	INTEL_CONTEXT_SCHEDULE_OUT,
+	INTEL_CONTEXT_SCHEDULE_PREEMPTED,
 };
 
 /* Logical Rings */
-- 
2.14.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v3 08/13] drm/i915: Introduce a preempt context
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
                   ` (5 preceding siblings ...)
  2017-09-28 19:39 ` [PATCH v3 07/13] drm/i915/execlists: Distinguish the incomplete context notifies Chris Wilson
@ 2017-09-28 19:39 ` Chris Wilson
  2017-09-29  7:23   ` Joonas Lahtinen
  2017-09-28 19:39 ` [PATCH v3 09/13] drm/i915/execlists: Move bdw GPGPU w/a to emit_bb Chris Wilson
                   ` (9 subsequent siblings)
  16 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2017-09-28 19:39 UTC (permalink / raw)
  To: intel-gfx

Add another perma-pinned context for using for preemption at any time.
We cannot just reuse the existing kernel context, as first and foremost
we need to ensure that we can preempt the kernel context itself, so
require a distinct context id. Similar to the kernel context, we may
want to interrupt execution and switch to the preempt context at any
time, and so it needs to be permanently pinned and available.

To compensate for yet another permanent allocation, we shrink the
existing context and the new context by reducing their ringbuffer to the
minimum.

v2: Assert that we never allocate a request from the preemption context.
v3: Limit perma-pin to engines that may preempt.
v4: Onion cleanup for early driver death

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  6 ++-
 drivers/gpu/drm/i915/i915_gem_context.c | 76 ++++++++++++++++++++++++---------
 drivers/gpu/drm/i915/i915_gem_request.c |  7 +++
 drivers/gpu/drm/i915/intel_engine_cs.c  | 22 +++++++++-
 4 files changed, 87 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8e4280838f14..b08114116654 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -783,6 +783,7 @@ struct intel_csr {
 	func(has_l3_dpf); \
 	func(has_llc); \
 	func(has_logical_ring_contexts); \
+	func(has_logical_ring_preemption); \
 	func(has_overlay); \
 	func(has_pipe_cxsr); \
 	func(has_pooled_eu); \
@@ -2251,8 +2252,11 @@ struct drm_i915_private {
 	wait_queue_head_t gmbus_wait_queue;
 
 	struct pci_dev *bridge_dev;
-	struct i915_gem_context *kernel_context;
 	struct intel_engine_cs *engine[I915_NUM_ENGINES];
+	/* Context used internally to idle the GPU and setup initial state */
+	struct i915_gem_context *kernel_context;
+	/* Context only to be used for injecting preemption commands */
+	struct i915_gem_context *preempt_context;
 	struct i915_vma *semaphore;
 
 	struct drm_dma_handle *status_page_dmah;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 921ee369c74d..f299b0a7c765 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -416,14 +416,43 @@ i915_gem_context_create_gvt(struct drm_device *dev)
 	return ctx;
 }
 
+static struct i915_gem_context *
+create_kernel_context(struct drm_i915_private *i915, int prio)
+{
+	struct i915_gem_context *ctx;
+
+	ctx = i915_gem_create_context(i915, NULL);
+	if (IS_ERR(ctx))
+		return ctx;
+
+	i915_gem_context_clear_bannable(ctx);
+	ctx->priority = prio;
+	ctx->ring_size = PAGE_SIZE;
+
+	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
+
+	return ctx;
+}
+
+static void
+destroy_kernel_context(struct i915_gem_context **ctxp)
+{
+	struct i915_gem_context *ctx;
+
+	/* Keep the context ref so that we can free it immediately ourselves */
+	ctx = i915_gem_context_get(fetch_and_zero(ctxp));
+	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
+
+	context_close(ctx);
+	i915_gem_context_free(ctx);
+}
+
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 {
 	struct i915_gem_context *ctx;
+	int err;
 
-	/* Init should only be called once per module load. Eventually the
-	 * restriction on the context_disabled check can be loosened. */
-	if (WARN_ON(dev_priv->kernel_context))
-		return 0;
+	GEM_BUG_ON(dev_priv->kernel_context);
 
 	INIT_LIST_HEAD(&dev_priv->contexts.list);
 	INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker);
@@ -441,28 +470,38 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
 	ida_init(&dev_priv->contexts.hw_ida);
 
-	ctx = i915_gem_create_context(dev_priv, NULL);
+	/* lowest priority; idle task */
+	ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN);
 	if (IS_ERR(ctx)) {
-		DRM_ERROR("Failed to create default global context (error %ld)\n",
-			  PTR_ERR(ctx));
-		return PTR_ERR(ctx);
+		DRM_ERROR("Failed to create default global context\n");
+		err = PTR_ERR(ctx);
+		goto err;
 	}
-
-	/* For easy recognisablity, we want the kernel context to be 0 and then
+	/*
+	 * For easy recognisablity, we want the kernel context to be 0 and then
 	 * all user contexts will have non-zero hw_id.
 	 */
 	GEM_BUG_ON(ctx->hw_id);
-
-	i915_gem_context_clear_bannable(ctx);
-	ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */
 	dev_priv->kernel_context = ctx;
 
-	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
+	/* highest priority; preempting task */
+	ctx = create_kernel_context(dev_priv, INT_MAX);
+	if (IS_ERR(ctx)) {
+		DRM_ERROR("Failed to create default preempt context\n");
+		err = PTR_ERR(ctx);
+		goto err_kernel_context;
+	}
+	dev_priv->preempt_context = ctx;
 
 	DRM_DEBUG_DRIVER("%s context support initialized\n",
 			 dev_priv->engine[RCS]->context_size ? "logical" :
 			 "fake");
 	return 0;
+
+err_kernel_context:
+	destroy_kernel_context(&dev_priv->kernel_context);
+err:
+	return err;
 }
 
 void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
@@ -507,15 +546,10 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
 
 void i915_gem_contexts_fini(struct drm_i915_private *i915)
 {
-	struct i915_gem_context *ctx;
-
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	/* Keep the context so that we can free it immediately ourselves */
-	ctx = i915_gem_context_get(fetch_and_zero(&i915->kernel_context));
-	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
-	context_close(ctx);
-	i915_gem_context_free(ctx);
+	destroy_kernel_context(&i915->kernel_context);
+	destroy_kernel_context(&i915->preempt_context);
 
 	/* Must free all deferred contexts (via flush_workqueue) first */
 	ida_destroy(&i915->contexts.hw_ida);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 14956d899911..b100b38f1dd2 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -587,6 +587,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
+	/*
+	 * Preempt contexts are reserved for exclusive use to inject a
+	 * preemption context switch. They are never to be used for any trivial
+	 * request!
+	 */
+	GEM_BUG_ON(ctx == dev_priv->preempt_context);
+
 	/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
 	 * EIO if the GPU is already wedged.
 	 */
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 2460d78581b4..37d1fd9aaa56 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -613,9 +613,22 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	if (IS_ERR(ring))
 		return PTR_ERR(ring);
 
+	/*
+	 * Similarly the preempt context must always be available so that
+	 * we can interrupt the engine at any time.
+	 */
+	if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) {
+		ring = engine->context_pin(engine,
+					   engine->i915->preempt_context);
+		if (IS_ERR(ring)) {
+			ret = PTR_ERR(ring);
+			goto err_unpin_kernel;
+		}
+	}
+
 	ret = intel_engine_init_breadcrumbs(engine);
 	if (ret)
-		goto err_unpin;
+		goto err_unpin_preempt;
 
 	ret = i915_gem_render_state_init(engine);
 	if (ret)
@@ -634,7 +647,10 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	i915_gem_render_state_fini(engine);
 err_breadcrumbs:
 	intel_engine_fini_breadcrumbs(engine);
-err_unpin:
+err_unpin_preempt:
+	if (INTEL_INFO(engine->i915)->has_logical_ring_preemption)
+		engine->context_unpin(engine, engine->i915->preempt_context);
+err_unpin_kernel:
 	engine->context_unpin(engine, engine->i915->kernel_context);
 	return ret;
 }
@@ -660,6 +676,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	intel_engine_cleanup_cmd_parser(engine);
 	i915_gem_batch_pool_fini(&engine->batch_pool);
 
+	if (INTEL_INFO(engine->i915)->has_logical_ring_preemption)
+		engine->context_unpin(engine, engine->i915->preempt_context);
 	engine->context_unpin(engine, engine->i915->kernel_context);
 }
 
-- 
2.14.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 08/13] drm/i915: Introduce a preempt context
  2017-09-28 19:39 ` [PATCH v3 08/13] drm/i915: Introduce a preempt context Chris Wilson
@ 2017-09-29  7:23   ` Joonas Lahtinen
  0 siblings, 0 replies; 30+ messages in thread
From: Joonas Lahtinen @ 2017-09-29  7:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Thu, 2017-09-28 at 20:39 +0100, Chris Wilson wrote:
> Add another perma-pinned context for using for preemption at any time.
> We cannot just reuse the existing kernel context, as first and foremost
> we need to ensure that we can preempt the kernel context itself, so
> require a distinct context id. Similar to the kernel context, we may
> want to interrupt execution and switch to the preempt context at any
> time, and so it needs to be permanently pinned and available.
> 
> To compensate for yet another permanent allocation, we shrink the
> existing context and the new context by reducing their ringbuffer to the
> minimum.
> 
> v2: Assert that we never allocate a request from the preemption context.
> v3: Limit perma-pin to engines that may preempt.
> v4: Onion cleanup for early driver death
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>

<SNIP>

> @@ -507,15 +546,10 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
>  
>  void i915_gem_contexts_fini(struct drm_i915_private *i915)
>  {
> -	struct i915_gem_context *ctx;
> -
>  	lockdep_assert_held(&i915->drm.struct_mutex);
>  
> -	/* Keep the context so that we can free it immediately ourselves */
> -	ctx = i915_gem_context_get(fetch_and_zero(&i915->kernel_context));
> -	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
> -	context_close(ctx);
> -	i915_gem_context_free(ctx);
> +	destroy_kernel_context(&i915->kernel_context);
> +	destroy_kernel_context(&i915->preempt_context);

You first create kernel, then pre-empt, destroy should be in the
reverse order, really.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH v3 09/13] drm/i915/execlists: Move bdw GPGPU w/a to emit_bb
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
                   ` (6 preceding siblings ...)
  2017-09-28 19:39 ` [PATCH v3 08/13] drm/i915: Introduce a preempt context Chris Wilson
@ 2017-09-28 19:39 ` Chris Wilson
  2017-09-28 19:39 ` [PATCH v3 10/13] drm/i915/execlists: Keep request->priority for its lifetime Chris Wilson
                   ` (8 subsequent siblings)
  16 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-09-28 19:39 UTC (permalink / raw)
  To: intel-gfx

Move the re-enabling of MI arbitration from a per-bb w/a buffer to the
emission of the batch buffer itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0f6c839d65a1..e5b470ce43e8 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1159,24 +1159,6 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 	return batch;
 }
 
-/*
- *  This batch is started immediately after indirect_ctx batch. Since we ensure
- *  that indirect_ctx ends on a cacheline this batch is aligned automatically.
- *
- *  The number of DWORDS written are returned using this field.
- *
- *  This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding
- *  to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant.
- */
-static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
-	/* WaDisableCtxRestoreArbitration:bdw,chv */
-	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-	*batch++ = MI_BATCH_BUFFER_END;
-
-	return batch;
-}
-
 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 {
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
@@ -1291,7 +1273,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 		break;
 	case 8:
 		wa_bb_fn[0] = gen8_init_indirectctx_bb;
-		wa_bb_fn[1] = gen8_init_perctx_bb;
+		wa_bb_fn[1] = NULL;
 		break;
 	default:
 		MISSING_CASE(INTEL_GEN(engine->i915));
@@ -1535,13 +1517,15 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
+	/* WaDisableCtxRestoreArbitration:bdw,chv */
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
 	/* FIXME(BDW): Address space and security selectors. */
 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) |
 		(flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
 	*cs++ = lower_32_bits(offset);
 	*cs++ = upper_32_bits(offset);
-	*cs++ = MI_NOOP;
 	intel_ring_advance(req, cs);
 
 	return 0;
-- 
2.14.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v3 10/13] drm/i915/execlists: Keep request->priority for its lifetime
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
                   ` (7 preceding siblings ...)
  2017-09-28 19:39 ` [PATCH v3 09/13] drm/i915/execlists: Move bdw GPGPU w/a to emit_bb Chris Wilson
@ 2017-09-28 19:39 ` Chris Wilson
  2017-09-28 19:39 ` [PATCH v3 11/13] drm/i915: Expand I915_PARAM_HAS_SCHEDULER into a capability bitmask Chris Wilson
                   ` (7 subsequent siblings)
  16 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-09-28 19:39 UTC (permalink / raw)
  To: intel-gfx

With preemption, we will want to "unsubmit" a request, taking it back
from the hw and returning it to the priority sorted execution list. In
order to know where to insert it into that list, we need to remember
its adjust priority (which may change even as it was being executed).

This also affects reset for execlists as we are now unsubmitting the
requests following the reset (rather than directly writing the ELSP for
the inflight contexts). This turns reset into an accidental preemption
point, as after the reset we may choose a different pair of contexts to
submit to hw.

GuC is not updated as this series doesn't add preemption to the GuC
submission, and so it can keep benefiting from the early pruning of the
DFS inside execlists_schedule() for a little longer. We also need to
find a way of reducing the cost of that DFS...

v2: Include priority in error-state

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  2 ++
 drivers/gpu/drm/i915/i915_gpu_error.c | 10 ++++++----
 drivers/gpu/drm/i915/intel_lrc.c      | 14 ++++++++++----
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b08114116654..85c5b3c707c0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -982,6 +982,7 @@ struct i915_gpu_state {
 			pid_t pid;
 			u32 handle;
 			u32 hw_id;
+			int priority;
 			int ban_score;
 			int active;
 			int guilty;
@@ -1004,6 +1005,7 @@ struct i915_gpu_state {
 			long jiffies;
 			pid_t pid;
 			u32 context;
+			int priority;
 			int ban_score;
 			u32 seqno;
 			u32 head;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index c14552ab270b..dc91b32d699e 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -377,9 +377,9 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
 	if (!erq->seqno)
 		return;
 
-	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, emitted %dms ago, head %08x, tail %08x\n",
+	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n",
 		   prefix, erq->pid, erq->ban_score,
-		   erq->context, erq->seqno,
+		   erq->context, erq->seqno, erq->priority,
 		   jiffies_to_msecs(jiffies - erq->jiffies),
 		   erq->head, erq->tail);
 }
@@ -388,9 +388,9 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
 				const char *header,
 				const struct drm_i915_error_context *ctx)
 {
-	err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n",
+	err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d guilty %d active %d\n",
 		   header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
-		   ctx->ban_score, ctx->guilty, ctx->active);
+		   ctx->priority, ctx->ban_score, ctx->guilty, ctx->active);
 }
 
 static void error_print_engine(struct drm_i915_error_state_buf *m,
@@ -1271,6 +1271,7 @@ static void record_request(struct drm_i915_gem_request *request,
 			   struct drm_i915_error_request *erq)
 {
 	erq->context = request->ctx->hw_id;
+	erq->priority = request->priotree.priority;
 	erq->ban_score = atomic_read(&request->ctx->ban_score);
 	erq->seqno = request->global_seqno;
 	erq->jiffies = request->emitted_jiffies;
@@ -1364,6 +1365,7 @@ static void record_context(struct drm_i915_error_context *e,
 
 	e->handle = ctx->user_handle;
 	e->hw_id = ctx->hw_id;
+	e->priority = ctx->priority;
 	e->ban_score = atomic_read(&ctx->ban_score);
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e5b470ce43e8..02ea1e4e098b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -585,8 +585,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			}
 
 			INIT_LIST_HEAD(&rq->priotree.link);
-			rq->priotree.priority = INT_MAX;
-
 			__i915_gem_request_submit(rq);
 			trace_i915_gem_request_in(rq, port_index(port, execlists));
 			last = rq;
@@ -794,6 +792,7 @@ static void intel_lrc_irq_handler(unsigned long data)
 				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 
 				trace_i915_gem_request_out(rq);
+				rq->priotree.priority = INT_MAX;
 				i915_gem_request_put(rq);
 
 				execlists_port_complete(execlists, port);
@@ -846,11 +845,15 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
+static struct drm_i915_gem_request *pt_to_request(struct i915_priotree *pt)
+{
+	return container_of(pt, struct drm_i915_gem_request, priotree);
+}
+
 static struct intel_engine_cs *
 pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
 {
-	struct intel_engine_cs *engine =
-		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
+	struct intel_engine_cs *engine = pt_to_request(pt)->engine;
 
 	GEM_BUG_ON(!locked);
 
@@ -906,6 +909,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		 * engines.
 		 */
 		list_for_each_entry(p, &pt->signalers_list, signal_link) {
+			if (i915_gem_request_completed(pt_to_request(p->signaler)))
+				continue;
+
 			GEM_BUG_ON(p->signaler->priority < pt->priority);
 			if (prio > READ_ONCE(p->signaler->priority))
 				list_move_tail(&p->dfs_link, &dfs);
-- 
2.14.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v3 11/13] drm/i915: Expand I915_PARAM_HAS_SCHEDULER into a capability bitmask
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
                   ` (8 preceding siblings ...)
  2017-09-28 19:39 ` [PATCH v3 10/13] drm/i915/execlists: Keep request->priority for its lifetime Chris Wilson
@ 2017-09-28 19:39 ` Chris Wilson
  2017-09-28 19:39 ` [PATCH v3 12/13] drm/i915/execlists: Preemption! Chris Wilson
                   ` (6 subsequent siblings)
  16 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-09-28 19:39 UTC (permalink / raw)
  To: intel-gfx

In the next few patches, we wish to enable different features for the
scheduler, some which may subtlety change ABI (e.g. allow requests to be
reordered under different circumstances). So we need to make sure
userspace is cognizant of the changes (if they care), by which we employ
the usual method of a GETPARAM. We already have an
I915_PARAM_HAS_SCHEDULER (which notes the existing ability to reorder
requests to avoid bubbles), and now we wish to extend that to be a
bitmask to describe the different capabilities implemented.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 5 +++--
 include/uapi/drm/i915_drm.h     | 9 ++++++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 59ac9199b35d..c5070f859c66 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -367,8 +367,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		value = i915_gem_mmap_gtt_version();
 		break;
 	case I915_PARAM_HAS_SCHEDULER:
-		value = dev_priv->engine[RCS] &&
-			dev_priv->engine[RCS]->schedule;
+		value = 0;
+		if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule)
+			value |= I915_SCHEDULER_CAP_ENABLED;
 		break;
 	case I915_PARAM_MMAP_VERSION:
 		/* Remember to bump this if the version changes! */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index fe25a01c81f2..aa4a3b20ef6b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -397,10 +397,17 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_MIN_EU_IN_POOL	 39
 #define I915_PARAM_MMAP_GTT_VERSION	 40
 
-/* Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution
  * priorities and the driver will attempt to execute batches in priority order.
+ * The param returns a capability bitmask, nonzero implies that the scheduler
+ * is enabled, with different features present according to the mask.
  */
 #define I915_PARAM_HAS_SCHEDULER	 41
+#define   I915_SCHEDULER_CAP_ENABLED	(1ul << 0)
+#define   I915_SCHEDULER_CAP_PRIORITY	(1ul << 1)
+#define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
+
 #define I915_PARAM_HUC_STATUS		 42
 
 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
-- 
2.14.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v3 12/13] drm/i915/execlists: Preemption!
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
                   ` (9 preceding siblings ...)
  2017-09-28 19:39 ` [PATCH v3 11/13] drm/i915: Expand I915_PARAM_HAS_SCHEDULER into a capability bitmask Chris Wilson
@ 2017-09-28 19:39 ` Chris Wilson
  2017-09-29  6:55   ` Mika Kuoppala
  2017-09-29  7:29   ` Joonas Lahtinen
  2017-09-28 19:39 ` [PATCH v3 13/13] drm/i915/scheduler: Support user-defined priorities Chris Wilson
                   ` (5 subsequent siblings)
  16 siblings, 2 replies; 30+ messages in thread
From: Chris Wilson @ 2017-09-28 19:39 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky, Mika Kuoppala

When we write to ELSP, it triggers a context preemption at the earliest
arbitration point (3DPRIMITIVE, some PIPECONTROLs, a few other
operations and the explicit MI_ARB_CHECK). If this is to the same
context, it triggers a LITE_RESTORE where the RING_TAIL is merely
updated (used currently to chain requests from the same context
together, avoiding bubbles). However, if it is to a different context, a
full context-switch is performed and it will start to execute the new
context saving the image of the old for later execution.

Previously we avoided preemption by only submitting a new context when
the old was idle. But now we wish embrace it, and if the new request has
a higher priority than the currently executing request, we write to the
ELSP regardless, thus triggering preemption, but we tell the GPU to
switch to our special preemption context (not the target). In the
context-switch interrupt handler, we know that the previous contexts
have finished execution and so can unwind all the incomplete requests
and compute the new highest priority request to execute.

It would be feasible to avoid the switch-to-idle intermediate by
programming the ELSP with the target context. The difficulty is in
tracking which request that should be whilst maintaining the dependency
change, the error comes in with coalesced requests. As we only track the
most recent request and its priority, we may run into the issue of being
tricked in preempting a high priority request that was followed by a
low priority request from the same context (e.g. for PI); worse still
that earlier request may be our own dependency and the order then broken
by preemption. By injecting the switch-to-idle and then recomputing the
priority queue, we avoid the issue with tracking in-flight coalesced
requests. Having tried the preempt-to-busy approach, and failed to find
a way around the coalesced priority issue, Michal's original proposal to
inject an idle context (based on handling GuC preemption) succeeds.

The current heuristic for deciding when to preempt are only if the new
request is of higher priority, and has the privileged priority of
greater than 0. Note that the scheduler remains unfair!

v2: Disable for gen8 (bdw/bsw) as we need additional w/a for GPGPU.
Since, the feature is now conditional and not always available when we
have a scheduler, make it known via the HAS_SCHEDULER GETPARAM (now a
capability mask).
v3: Stylistic tweaks.

Suggested-by: Michal Winiarski <michal.winiarski@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c         |   9 ++-
 drivers/gpu/drm/i915/i915_irq.c         |   6 +-
 drivers/gpu/drm/i915/i915_pci.c         |   1 +
 drivers/gpu/drm/i915/intel_lrc.c        | 137 ++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.h |   2 +
 5 files changed, 119 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c5070f859c66..a3bb352a581b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -368,9 +368,16 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		break;
 	case I915_PARAM_HAS_SCHEDULER:
 		value = 0;
-		if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule)
+		if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) {
 			value |= I915_SCHEDULER_CAP_ENABLED;
+
+			if (INTEL_INFO(dev_priv)->has_logical_ring_preemption &&
+			    i915_modparams.enable_execlists &&
+			    !i915_modparams.enable_guc_submission)
+				value |= I915_SCHEDULER_CAP_PREEMPTION;
+		}
 		break;
+
 	case I915_PARAM_MMAP_VERSION:
 		/* Remember to bump this if the version changes! */
 	case I915_PARAM_HAS_GEM:
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index efd7827ff181..8620fbd22c55 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1382,10 +1382,8 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 	bool tasklet = false;
 
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
-		if (port_count(&execlists->port[0])) {
-			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-			tasklet = true;
-		}
+		__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+		tasklet = true;
 	}
 
 	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 01d4b569b2cc..d11c1f5f5a97 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -470,6 +470,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = {
 	.has_rc6 = 1, \
 	.has_dp_mst = 1, \
 	.has_logical_ring_contexts = 1, \
+	.has_logical_ring_preemption = 1, \
 	.has_guc = 1, \
 	.has_aliasing_ppgtt = 1, \
 	.has_full_ppgtt = 1, \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 02ea1e4e098b..bc3fc4cd039e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -208,9 +208,9 @@
 
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
-
 #define WA_TAIL_DWORDS 2
 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
+#define PREEMPT_ID 0x1
 
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_engine_cs *engine);
@@ -430,6 +430,12 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 	return ce->lrc_desc;
 }
 
+static inline void elsp_write(u64 desc, u32 __iomem *elsp)
+{
+	writel(upper_32_bits(desc), elsp);
+	writel(lower_32_bits(desc), elsp);
+}
+
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
 	struct execlist_port *port = engine->execlists.port;
@@ -455,8 +461,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 			desc = 0;
 		}
 
-		writel(upper_32_bits(desc), elsp);
-		writel(lower_32_bits(desc), elsp);
+		elsp_write(desc, elsp);
 	}
 }
 
@@ -489,26 +494,43 @@ static void port_assign(struct execlist_port *port,
 	port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
 }
 
+static void inject_preempt_context(struct intel_engine_cs *engine)
+{
+	struct intel_context *ce =
+		&engine->i915->preempt_context->engine[engine->id];
+	u32 __iomem *elsp =
+		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	unsigned int n;
+
+	GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID);
+	GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES));
+
+	memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES);
+	ce->ring->tail += WA_TAIL_BYTES;
+	ce->ring->tail &= (ce->ring->size - 1);
+	ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
+
+	for (n = execlists_num_ports(&engine->execlists); --n; )
+		elsp_write(0, elsp);
+
+	elsp_write(ce->lrc_desc, elsp);
+}
+
+static bool can_preempt(struct intel_engine_cs *engine)
+{
+	return INTEL_INFO(engine->i915)->has_logical_ring_preemption;
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *last;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
 	const struct execlist_port * const last_port =
 		&execlists->port[execlists->port_mask];
+	struct drm_i915_gem_request *last = port_request(port);
 	struct rb_node *rb;
 	bool submit = false;
 
-	last = port_request(port);
-	if (last)
-		/* WaIdleLiteRestore:bdw,skl
-		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
-		 * as we resubmit the request. See gen8_emit_breadcrumb()
-		 * for where we prepare the padding after the end of the
-		 * request.
-		 */
-		last->tail = last->wa_tail;
-
 	/* Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
 	 * static for a context, and unique to each, so we only execute
@@ -533,7 +555,45 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	spin_lock_irq(&engine->timeline->lock);
 	rb = execlists->first;
 	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
-	while (rb) {
+	if (!rb)
+		goto unlock;
+
+	if (last) {
+		/*
+		 * Don't resubmit or switch until all outstanding
+		 * preemptions (lite-restore) are seen. Then we
+		 * know the next preemption status we see corresponds
+		 * to this ELSP update.
+		 */
+		if (port_count(&port[0]) > 1)
+			goto unlock;
+
+		if (can_preempt(engine) &&
+		    rb_entry(rb, struct i915_priolist, node)->priority >
+		    max(last->priotree.priority, 0)) {
+			/*
+			 * Switch to our empty preempt context so
+			 * the state of the GPU is known (idle).
+			 */
+			inject_preempt_context(engine);
+			execlists->preempt = true;
+			goto unlock;
+		} else {
+			if (port_count(&port[1]))
+				goto unlock;
+
+			/* WaIdleLiteRestore:bdw,skl
+			 * Apply the wa NOOPs to prevent
+			 * ring:HEAD == req:TAIL as we resubmit the
+			 * request. See gen8_emit_breadcrumb() for
+			 * where we prepare the padding after the
+			 * end of the request.
+			 */
+			last->tail = last->wa_tail;
+		}
+	}
+
+	do {
 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
 		struct drm_i915_gem_request *rq, *rn;
 
@@ -596,11 +656,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
-	}
+	} while (rb);
 done:
 	execlists->first = rb;
 	if (submit)
 		port_assign(port, last);
+unlock:
 	spin_unlock_irq(&engine->timeline->lock);
 
 	if (submit)
@@ -681,13 +742,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
-static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
-{
-	const struct execlist_port *port = engine->execlists.port;
-
-	return port_count(&port[0]) + port_count(&port[1]) < 2;
-}
-
 /*
  * Check the unread Context Status Buffers and manage the submission of new
  * contexts to the ELSP accordingly.
@@ -696,7 +750,7 @@ static void intel_lrc_irq_handler(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
+	struct execlist_port * const port = execlists->port;
 	struct drm_i915_private *dev_priv = engine->i915;
 
 	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
@@ -781,6 +835,23 @@ static void intel_lrc_irq_handler(unsigned long data)
 			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
 				continue;
 
+			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE &&
+			    buf[2*head + 1] == PREEMPT_ID) {
+				execlist_cancel_port_requests(execlists);
+
+				spin_lock_irq(&engine->timeline->lock);
+				unwind_incomplete_requests(engine);
+				spin_unlock_irq(&engine->timeline->lock);
+
+				GEM_BUG_ON(!execlists->preempt);
+				execlists->preempt = false;
+				continue;
+			}
+
+			if (status & GEN8_CTX_STATUS_PREEMPTED &&
+			    execlists->preempt)
+				continue;
+
 			/* Check the context/desc id for this event matches */
 			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
 
@@ -812,7 +883,7 @@ static void intel_lrc_irq_handler(unsigned long data)
 		}
 	}
 
-	if (execlists_elsp_ready(engine))
+	if (!execlists->preempt)
 		execlists_dequeue(engine);
 
 	intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
@@ -825,7 +896,7 @@ static void insert_request(struct intel_engine_cs *engine,
 	struct i915_priolist *p = lookup_priolist(engine, pt, prio);
 
 	list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
-	if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine))
+	if (ptr_unmask_bits(p, 1))
 		tasklet_hi_schedule(&engine->execlists.irq_tasklet);
 }
 
@@ -955,8 +1026,6 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	}
 
 	spin_unlock_irq(&engine->timeline->lock);
-
-	/* XXX Do we need to preempt to make room for us and our deps? */
 }
 
 static struct intel_ring *
@@ -1152,6 +1221,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 				       i915_ggtt_offset(engine->scratch) +
 				       2 * CACHELINE_BYTES);
 
+	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
 	/* Pad to end of cacheline */
 	while ((unsigned long)batch % CACHELINE_BYTES)
 		*batch++ = MI_NOOP;
@@ -1167,6 +1238,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 
 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 {
+	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
 
@@ -1212,6 +1285,8 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 		*batch++ = 0;
 	}
 
+	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
 	/* Pad to end of cacheline */
 	while ((unsigned long)batch % CACHELINE_BYTES)
 		*batch++ = MI_NOOP;
@@ -1365,6 +1440,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 		   GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 	execlists->csb_head = -1;
+	execlists->preempt = false;
 
 	/* After a GPU reset, we may have requests to replay */
 	if (!i915_modparams.enable_guc_submission && execlists->first)
@@ -1660,7 +1736,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
  */
 static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs)
 {
-	*cs++ = MI_NOOP;
+	/* Ensure there's always at least one preemption point per-request. */
+	*cs++ = MI_ARB_CHECK;
 	*cs++ = MI_NOOP;
 	request->wa_tail = intel_ring_offset(request, cs);
 }
@@ -1681,7 +1758,6 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
 
 	gen8_emit_wa_tail(request, cs);
 }
-
 static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
 
 static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
@@ -1709,7 +1785,6 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
 
 	gen8_emit_wa_tail(request, cs);
 }
-
 static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
 
 static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 56d7ae9f298b..891d9555dce6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -238,6 +238,8 @@ struct intel_engine_execlists {
 #define EXECLIST_MAX_PORTS 2
 	} port[EXECLIST_MAX_PORTS];
 
+	bool preempt;
+
 	/**
 	 * @port_mask: number of execlist ports - 1
 	 */
-- 
2.14.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 12/13] drm/i915/execlists: Preemption!
  2017-09-28 19:39 ` [PATCH v3 12/13] drm/i915/execlists: Preemption! Chris Wilson
@ 2017-09-29  6:55   ` Mika Kuoppala
  2017-09-29 10:07     ` Chris Wilson
  2017-09-29  7:29   ` Joonas Lahtinen
  1 sibling, 1 reply; 30+ messages in thread
From: Mika Kuoppala @ 2017-09-29  6:55 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Ben Widawsky

Chris Wilson <chris@chris-wilson.co.uk> writes:

> When we write to ELSP, it triggers a context preemption at the earliest
> arbitration point (3DPRIMITIVE, some PIPECONTROLs, a few other
> operations and the explicit MI_ARB_CHECK). If this is to the same
> context, it triggers a LITE_RESTORE where the RING_TAIL is merely
> updated (used currently to chain requests from the same context
> together, avoiding bubbles). However, if it is to a different context, a
> full context-switch is performed and it will start to execute the new
> context saving the image of the old for later execution.
>
> Previously we avoided preemption by only submitting a new context when
> the old was idle. But now we wish embrace it, and if the new request has
> a higher priority than the currently executing request, we write to the
> ELSP regardless, thus triggering preemption, but we tell the GPU to
> switch to our special preemption context (not the target). In the
> context-switch interrupt handler, we know that the previous contexts
> have finished execution and so can unwind all the incomplete requests
> and compute the new highest priority request to execute.
>
> It would be feasible to avoid the switch-to-idle intermediate by
> programming the ELSP with the target context. The difficulty is in
> tracking which request that should be whilst maintaining the dependency
> change, the error comes in with coalesced requests. As we only track the
> most recent request and its priority, we may run into the issue of being
> tricked in preempting a high priority request that was followed by a
> low priority request from the same context (e.g. for PI); worse still
> that earlier request may be our own dependency and the order then broken
> by preemption. By injecting the switch-to-idle and then recomputing the
> priority queue, we avoid the issue with tracking in-flight coalesced
> requests. Having tried the preempt-to-busy approach, and failed to find
> a way around the coalesced priority issue, Michal's original proposal to
> inject an idle context (based on handling GuC preemption) succeeds.
>
> The current heuristic for deciding when to preempt are only if the new
> request is of higher priority, and has the privileged priority of
> greater than 0. Note that the scheduler remains unfair!
>
> v2: Disable for gen8 (bdw/bsw) as we need additional w/a for GPGPU.
> Since, the feature is now conditional and not always available when we
> have a scheduler, make it known via the HAS_SCHEDULER GETPARAM (now a
> capability mask).
> v3: Stylistic tweaks.
>
> Suggested-by: Michal Winiarski <michal.winiarski@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michal Winiarski <michal.winiarski@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
> Cc: Zhi Wang <zhi.a.wang@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.c         |   9 ++-
>  drivers/gpu/drm/i915/i915_irq.c         |   6 +-
>  drivers/gpu/drm/i915/i915_pci.c         |   1 +
>  drivers/gpu/drm/i915/intel_lrc.c        | 137 ++++++++++++++++++++++++--------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |   2 +
>  5 files changed, 119 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index c5070f859c66..a3bb352a581b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -368,9 +368,16 @@ static int i915_getparam(struct drm_device *dev, void *data,
>  		break;
>  	case I915_PARAM_HAS_SCHEDULER:
>  		value = 0;
> -		if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule)
> +		if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) {
>  			value |= I915_SCHEDULER_CAP_ENABLED;
> +
> +			if (INTEL_INFO(dev_priv)->has_logical_ring_preemption &&
> +			    i915_modparams.enable_execlists &&
> +			    !i915_modparams.enable_guc_submission)
> +				value |= I915_SCHEDULER_CAP_PREEMPTION;
> +		}
>  		break;
> +
>  	case I915_PARAM_MMAP_VERSION:
>  		/* Remember to bump this if the version changes! */
>  	case I915_PARAM_HAS_GEM:
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index efd7827ff181..8620fbd22c55 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1382,10 +1382,8 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
>  	bool tasklet = false;
>  
>  	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
> -		if (port_count(&execlists->port[0])) {
> -			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -			tasklet = true;
> -		}
> +		__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> +		tasklet = true;
>  	}
>  
>  	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 01d4b569b2cc..d11c1f5f5a97 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -470,6 +470,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = {
>  	.has_rc6 = 1, \
>  	.has_dp_mst = 1, \
>  	.has_logical_ring_contexts = 1, \
> +	.has_logical_ring_preemption = 1, \
>  	.has_guc = 1, \
>  	.has_aliasing_ppgtt = 1, \
>  	.has_full_ppgtt = 1, \
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 02ea1e4e098b..bc3fc4cd039e 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -208,9 +208,9 @@
>  
>  /* Typical size of the average request (2 pipecontrols and a MI_BB) */
>  #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
> -
>  #define WA_TAIL_DWORDS 2
>  #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
> +#define PREEMPT_ID 0x1
>  
>  static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>  					    struct intel_engine_cs *engine);
> @@ -430,6 +430,12 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
>  	return ce->lrc_desc;
>  }
>  
> +static inline void elsp_write(u64 desc, u32 __iomem *elsp)
> +{
> +	writel(upper_32_bits(desc), elsp);
> +	writel(lower_32_bits(desc), elsp);
> +}
> +
>  static void execlists_submit_ports(struct intel_engine_cs *engine)
>  {
>  	struct execlist_port *port = engine->execlists.port;
> @@ -455,8 +461,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>  			desc = 0;
>  		}
>  
> -		writel(upper_32_bits(desc), elsp);
> -		writel(lower_32_bits(desc), elsp);
> +		elsp_write(desc, elsp);
>  	}
>  }
>  
> @@ -489,26 +494,43 @@ static void port_assign(struct execlist_port *port,
>  	port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
>  }
>  
> +static void inject_preempt_context(struct intel_engine_cs *engine)
> +{
> +	struct intel_context *ce =
> +		&engine->i915->preempt_context->engine[engine->id];
> +	u32 __iomem *elsp =
> +		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
> +	unsigned int n;
> +
> +	GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID);
> +	GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES));
> +
> +	memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES);
> +	ce->ring->tail += WA_TAIL_BYTES;
> +	ce->ring->tail &= (ce->ring->size - 1);
> +	ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
> +
> +	for (n = execlists_num_ports(&engine->execlists); --n; )
> +		elsp_write(0, elsp);
> +
> +	elsp_write(ce->lrc_desc, elsp);
> +}
> +
> +static bool can_preempt(struct intel_engine_cs *engine)
> +{
> +	return INTEL_INFO(engine->i915)->has_logical_ring_preemption;
> +}
> +
>  static void execlists_dequeue(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_gem_request *last;
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
>  	struct execlist_port *port = execlists->port;
>  	const struct execlist_port * const last_port =
>  		&execlists->port[execlists->port_mask];
> +	struct drm_i915_gem_request *last = port_request(port);
>  	struct rb_node *rb;
>  	bool submit = false;
>  
> -	last = port_request(port);
> -	if (last)
> -		/* WaIdleLiteRestore:bdw,skl
> -		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
> -		 * as we resubmit the request. See gen8_emit_breadcrumb()
> -		 * for where we prepare the padding after the end of the
> -		 * request.
> -		 */
> -		last->tail = last->wa_tail;
> -
>  	/* Hardware submission is through 2 ports. Conceptually each port
>  	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
>  	 * static for a context, and unique to each, so we only execute
> @@ -533,7 +555,45 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  	spin_lock_irq(&engine->timeline->lock);
>  	rb = execlists->first;
>  	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
> -	while (rb) {
> +	if (!rb)
> +		goto unlock;
> +
> +	if (last) {
> +		/*
> +		 * Don't resubmit or switch until all outstanding
> +		 * preemptions (lite-restore) are seen. Then we
> +		 * know the next preemption status we see corresponds
> +		 * to this ELSP update.
> +		 */
> +		if (port_count(&port[0]) > 1)
> +			goto unlock;
> +
> +		if (can_preempt(engine) &&
> +		    rb_entry(rb, struct i915_priolist, node)->priority >
> +		    max(last->priotree.priority, 0)) {
> +			/*
> +			 * Switch to our empty preempt context so
> +			 * the state of the GPU is known (idle).
> +			 */
> +			inject_preempt_context(engine);
> +			execlists->preempt = true;
> +			goto unlock;
> +		} else {
> +			if (port_count(&port[1]))
> +				goto unlock;

I am assuming that this is check for hw
availability and nothing else? 

- Mika (being paranoid)

> +
> +			/* WaIdleLiteRestore:bdw,skl
> +			 * Apply the wa NOOPs to prevent
> +			 * ring:HEAD == req:TAIL as we resubmit the
> +			 * request. See gen8_emit_breadcrumb() for
> +			 * where we prepare the padding after the
> +			 * end of the request.
> +			 */
> +			last->tail = last->wa_tail;
> +		}
> +	}
> +
> +	do {
>  		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
>  		struct drm_i915_gem_request *rq, *rn;
>  
> @@ -596,11 +656,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  		INIT_LIST_HEAD(&p->requests);
>  		if (p->priority != I915_PRIORITY_NORMAL)
>  			kmem_cache_free(engine->i915->priorities, p);
> -	}
> +	} while (rb);
>  done:
>  	execlists->first = rb;
>  	if (submit)
>  		port_assign(port, last);
> +unlock:
>  	spin_unlock_irq(&engine->timeline->lock);
>  
>  	if (submit)
> @@ -681,13 +742,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  	spin_unlock_irqrestore(&engine->timeline->lock, flags);
>  }
>  
> -static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
> -{
> -	const struct execlist_port *port = engine->execlists.port;
> -
> -	return port_count(&port[0]) + port_count(&port[1]) < 2;
> -}
> -
>  /*
>   * Check the unread Context Status Buffers and manage the submission of new
>   * contexts to the ELSP accordingly.
> @@ -696,7 +750,7 @@ static void intel_lrc_irq_handler(unsigned long data)
>  {
>  	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
> -	struct execlist_port *port = execlists->port;
> +	struct execlist_port * const port = execlists->port;
>  	struct drm_i915_private *dev_priv = engine->i915;
>  
>  	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
> @@ -781,6 +835,23 @@ static void intel_lrc_irq_handler(unsigned long data)
>  			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>  				continue;
>  
> +			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE &&
> +			    buf[2*head + 1] == PREEMPT_ID) {
> +				execlist_cancel_port_requests(execlists);
> +
> +				spin_lock_irq(&engine->timeline->lock);
> +				unwind_incomplete_requests(engine);
> +				spin_unlock_irq(&engine->timeline->lock);
> +
> +				GEM_BUG_ON(!execlists->preempt);
> +				execlists->preempt = false;
> +				continue;
> +			}
> +
> +			if (status & GEN8_CTX_STATUS_PREEMPTED &&
> +			    execlists->preempt)
> +				continue;
> +
>  			/* Check the context/desc id for this event matches */
>  			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
>  
> @@ -812,7 +883,7 @@ static void intel_lrc_irq_handler(unsigned long data)
>  		}
>  	}
>  
> -	if (execlists_elsp_ready(engine))
> +	if (!execlists->preempt)
>  		execlists_dequeue(engine);
>  
>  	intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
> @@ -825,7 +896,7 @@ static void insert_request(struct intel_engine_cs *engine,
>  	struct i915_priolist *p = lookup_priolist(engine, pt, prio);
>  
>  	list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
> -	if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine))
> +	if (ptr_unmask_bits(p, 1))
>  		tasklet_hi_schedule(&engine->execlists.irq_tasklet);
>  }
>  
> @@ -955,8 +1026,6 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  	}
>  
>  	spin_unlock_irq(&engine->timeline->lock);
> -
> -	/* XXX Do we need to preempt to make room for us and our deps? */
>  }
>  
>  static struct intel_ring *
> @@ -1152,6 +1221,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
>  				       i915_ggtt_offset(engine->scratch) +
>  				       2 * CACHELINE_BYTES);
>  
> +	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> +
>  	/* Pad to end of cacheline */
>  	while ((unsigned long)batch % CACHELINE_BYTES)
>  		*batch++ = MI_NOOP;
> @@ -1167,6 +1238,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
>  
>  static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
>  {
> +	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
> +
>  	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
>  	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
>  
> @@ -1212,6 +1285,8 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
>  		*batch++ = 0;
>  	}
>  
> +	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> +
>  	/* Pad to end of cacheline */
>  	while ((unsigned long)batch % CACHELINE_BYTES)
>  		*batch++ = MI_NOOP;
> @@ -1365,6 +1440,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
>  		   GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
>  	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>  	execlists->csb_head = -1;
> +	execlists->preempt = false;
>  
>  	/* After a GPU reset, we may have requests to replay */
>  	if (!i915_modparams.enable_guc_submission && execlists->first)
> @@ -1660,7 +1736,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
>   */
>  static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs)
>  {
> -	*cs++ = MI_NOOP;
> +	/* Ensure there's always at least one preemption point per-request. */
> +	*cs++ = MI_ARB_CHECK;
>  	*cs++ = MI_NOOP;
>  	request->wa_tail = intel_ring_offset(request, cs);
>  }
> @@ -1681,7 +1758,6 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
>  
>  	gen8_emit_wa_tail(request, cs);
>  }
> -
>  static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
>  
>  static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
> @@ -1709,7 +1785,6 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
>  
>  	gen8_emit_wa_tail(request, cs);
>  }
> -
>  static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
>  
>  static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 56d7ae9f298b..891d9555dce6 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -238,6 +238,8 @@ struct intel_engine_execlists {
>  #define EXECLIST_MAX_PORTS 2
>  	} port[EXECLIST_MAX_PORTS];
>  
> +	bool preempt;
> +
>  	/**
>  	 * @port_mask: number of execlist ports - 1
>  	 */
> -- 
> 2.14.2
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 12/13] drm/i915/execlists: Preemption!
  2017-09-29  6:55   ` Mika Kuoppala
@ 2017-09-29 10:07     ` Chris Wilson
  0 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-09-29 10:07 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx; +Cc: Ben Widawsky

Quoting Mika Kuoppala (2017-09-29 07:55:45)
> > @@ -533,7 +555,45 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >       spin_lock_irq(&engine->timeline->lock);
> >       rb = execlists->first;
> >       GEM_BUG_ON(rb_first(&execlists->queue) != rb);
> > -     while (rb) {
> > +     if (!rb)
> > +             goto unlock;
> > +
> > +     if (last) {
> > +             /*
> > +              * Don't resubmit or switch until all outstanding
> > +              * preemptions (lite-restore) are seen. Then we
> > +              * know the next preemption status we see corresponds
> > +              * to this ELSP update.
> > +              */
> > +             if (port_count(&port[0]) > 1)
> > +                     goto unlock;
> > +
> > +             if (can_preempt(engine) &&
> > +                 rb_entry(rb, struct i915_priolist, node)->priority >
> > +                 max(last->priotree.priority, 0)) {
> > +                     /*
> > +                      * Switch to our empty preempt context so
> > +                      * the state of the GPU is known (idle).
> > +                      */
> > +                     inject_preempt_context(engine);
> > +                     execlists->preempt = true;
> > +                     goto unlock;
> > +             } else {
> > +                     if (port_count(&port[1]))
> > +                             goto unlock;
> 
> I am assuming that this is check for hw
> availability and nothing else? 

Technically this check is that we use last = port[0], and only check
port[0] for preemption. In theory, it would be possible to coalesce new
requests onto the second port, but it's complicated. (We would need to
track the possible lite-restore on the second port, i.e. whilst
submitting there was a context switch, but we may never see that
preemption event. Then we have the complication with priorities, where
we might be coalescing a high priority request onto the second port,
losing track of it.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 12/13] drm/i915/execlists: Preemption!
  2017-09-28 19:39 ` [PATCH v3 12/13] drm/i915/execlists: Preemption! Chris Wilson
  2017-09-29  6:55   ` Mika Kuoppala
@ 2017-09-29  7:29   ` Joonas Lahtinen
  2017-09-29 10:12     ` Chris Wilson
  1 sibling, 1 reply; 30+ messages in thread
From: Joonas Lahtinen @ 2017-09-29  7:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Mika Kuoppala, Ben Widawsky

On Thu, 2017-09-28 at 20:39 +0100, Chris Wilson wrote:
> When we write to ELSP, it triggers a context preemption at the earliest
> arbitration point (3DPRIMITIVE, some PIPECONTROLs, a few other
> operations and the explicit MI_ARB_CHECK). If this is to the same
> context, it triggers a LITE_RESTORE where the RING_TAIL is merely
> updated (used currently to chain requests from the same context
> together, avoiding bubbles). However, if it is to a different context, a
> full context-switch is performed and it will start to execute the new
> context saving the image of the old for later execution.
> 
> Previously we avoided preemption by only submitting a new context when
> the old was idle. But now we wish embrace it, and if the new request has
> a higher priority than the currently executing request, we write to the
> ELSP regardless, thus triggering preemption, but we tell the GPU to
> switch to our special preemption context (not the target). In the
> context-switch interrupt handler, we know that the previous contexts
> have finished execution and so can unwind all the incomplete requests
> and compute the new highest priority request to execute.
> 
> It would be feasible to avoid the switch-to-idle intermediate by
> programming the ELSP with the target context. The difficulty is in
> tracking which request that should be whilst maintaining the dependency
> change, the error comes in with coalesced requests. As we only track the
> most recent request and its priority, we may run into the issue of being
> tricked in preempting a high priority request that was followed by a
> low priority request from the same context (e.g. for PI); worse still
> that earlier request may be our own dependency and the order then broken
> by preemption. By injecting the switch-to-idle and then recomputing the
> priority queue, we avoid the issue with tracking in-flight coalesced
> requests. Having tried the preempt-to-busy approach, and failed to find
> a way around the coalesced priority issue, Michal's original proposal to
> inject an idle context (based on handling GuC preemption) succeeds.
> 
> The current heuristic for deciding when to preempt are only if the new
> request is of higher priority, and has the privileged priority of
> greater than 0. Note that the scheduler remains unfair!
> 
> v2: Disable for gen8 (bdw/bsw) as we need additional w/a for GPGPU.
> Since, the feature is now conditional and not always available when we
> have a scheduler, make it known via the HAS_SCHEDULER GETPARAM (now a
> capability mask).
> v3: Stylistic tweaks.
> 
> Suggested-by: Michal Winiarski <michal.winiarski@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michal Winiarski <michal.winiarski@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
> Cc: Zhi Wang <zhi.a.wang@intel.com>

I'm still voting for "preempting" variable name + kerneldoc.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 12/13] drm/i915/execlists: Preemption!
  2017-09-29  7:29   ` Joonas Lahtinen
@ 2017-09-29 10:12     ` Chris Wilson
  2017-09-29 13:14       ` Joonas Lahtinen
  0 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2017-09-29 10:12 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx; +Cc: Mika Kuoppala, Ben Widawsky

Quoting Joonas Lahtinen (2017-09-29 08:29:57)
> On Thu, 2017-09-28 at 20:39 +0100, Chris Wilson wrote:
> > When we write to ELSP, it triggers a context preemption at the earliest
> > arbitration point (3DPRIMITIVE, some PIPECONTROLs, a few other
> > operations and the explicit MI_ARB_CHECK). If this is to the same
> > context, it triggers a LITE_RESTORE where the RING_TAIL is merely
> > updated (used currently to chain requests from the same context
> > together, avoiding bubbles). However, if it is to a different context, a
> > full context-switch is performed and it will start to execute the new
> > context saving the image of the old for later execution.
> > 
> > Previously we avoided preemption by only submitting a new context when
> > the old was idle. But now we wish embrace it, and if the new request has
> > a higher priority than the currently executing request, we write to the
> > ELSP regardless, thus triggering preemption, but we tell the GPU to
> > switch to our special preemption context (not the target). In the
> > context-switch interrupt handler, we know that the previous contexts
> > have finished execution and so can unwind all the incomplete requests
> > and compute the new highest priority request to execute.
> > 
> > It would be feasible to avoid the switch-to-idle intermediate by
> > programming the ELSP with the target context. The difficulty is in
> > tracking which request that should be whilst maintaining the dependency
> > change, the error comes in with coalesced requests. As we only track the
> > most recent request and its priority, we may run into the issue of being
> > tricked in preempting a high priority request that was followed by a
> > low priority request from the same context (e.g. for PI); worse still
> > that earlier request may be our own dependency and the order then broken
> > by preemption. By injecting the switch-to-idle and then recomputing the
> > priority queue, we avoid the issue with tracking in-flight coalesced
> > requests. Having tried the preempt-to-busy approach, and failed to find
> > a way around the coalesced priority issue, Michal's original proposal to
> > inject an idle context (based on handling GuC preemption) succeeds.
> > 
> > The current heuristic for deciding when to preempt are only if the new
> > request is of higher priority, and has the privileged priority of
> > greater than 0. Note that the scheduler remains unfair!
> > 
> > v2: Disable for gen8 (bdw/bsw) as we need additional w/a for GPGPU.
> > Since, the feature is now conditional and not always available when we
> > have a scheduler, make it known via the HAS_SCHEDULER GETPARAM (now a
> > capability mask).
> > v3: Stylistic tweaks.
> > 
> > Suggested-by: Michal Winiarski <michal.winiarski@intel.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Michal Winiarski <michal.winiarski@intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> > Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> > Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
> > Cc: Zhi Wang <zhi.a.wang@intel.com>
> 
> I'm still voting for "preempting" variable name + kerneldoc.

I'm open to suggestions, or even a list of questions that you would like
answered for @preempt[ing].

That goes for anything. If at any time anyone sees something odd or not
clear from the context while reading the code, just send a patch adding a
question. Knowing at what point the confusion arose gives us the perfect
place to address that for the next reader.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 12/13] drm/i915/execlists: Preemption!
  2017-09-29 10:12     ` Chris Wilson
@ 2017-09-29 13:14       ` Joonas Lahtinen
  0 siblings, 0 replies; 30+ messages in thread
From: Joonas Lahtinen @ 2017-09-29 13:14 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Mika Kuoppala, Ben Widawsky

On Fri, 2017-09-29 at 11:12 +0100, Chris Wilson wrote:
> Quoting Joonas Lahtinen (2017-09-29 08:29:57)
> > On Thu, 2017-09-28 at 20:39 +0100, Chris Wilson wrote:
> > > When we write to ELSP, it triggers a context preemption at the earliest
> > > arbitration point (3DPRIMITIVE, some PIPECONTROLs, a few other
> > > operations and the explicit MI_ARB_CHECK). If this is to the same
> > > context, it triggers a LITE_RESTORE where the RING_TAIL is merely
> > > updated (used currently to chain requests from the same context
> > > together, avoiding bubbles). However, if it is to a different context, a
> > > full context-switch is performed and it will start to execute the new
> > > context saving the image of the old for later execution.
> > > 
> > > Previously we avoided preemption by only submitting a new context when
> > > the old was idle. But now we wish embrace it, and if the new request has
> > > a higher priority than the currently executing request, we write to the
> > > ELSP regardless, thus triggering preemption, but we tell the GPU to
> > > switch to our special preemption context (not the target). In the
> > > context-switch interrupt handler, we know that the previous contexts
> > > have finished execution and so can unwind all the incomplete requests
> > > and compute the new highest priority request to execute.
> > > 
> > > It would be feasible to avoid the switch-to-idle intermediate by
> > > programming the ELSP with the target context. The difficulty is in
> > > tracking which request that should be whilst maintaining the dependency
> > > change, the error comes in with coalesced requests. As we only track the
> > > most recent request and its priority, we may run into the issue of being
> > > tricked in preempting a high priority request that was followed by a
> > > low priority request from the same context (e.g. for PI); worse still
> > > that earlier request may be our own dependency and the order then broken
> > > by preemption. By injecting the switch-to-idle and then recomputing the
> > > priority queue, we avoid the issue with tracking in-flight coalesced
> > > requests. Having tried the preempt-to-busy approach, and failed to find
> > > a way around the coalesced priority issue, Michal's original proposal to
> > > inject an idle context (based on handling GuC preemption) succeeds.
> > > 
> > > The current heuristic for deciding when to preempt are only if the new
> > > request is of higher priority, and has the privileged priority of
> > > greater than 0. Note that the scheduler remains unfair!
> > > 
> > > v2: Disable for gen8 (bdw/bsw) as we need additional w/a for GPGPU.
> > > Since, the feature is now conditional and not always available when we
> > > have a scheduler, make it known via the HAS_SCHEDULER GETPARAM (now a
> > > capability mask).
> > > v3: Stylistic tweaks.
> > > 
> > > Suggested-by: Michal Winiarski <michal.winiarski@intel.com>
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Michal Winiarski <michal.winiarski@intel.com>
> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> > > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> > > Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> > > Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
> > > Cc: Zhi Wang <zhi.a.wang@intel.com>
> > 
> > I'm still voting for "preempting" variable name + kerneldoc.
> 
> I'm open to suggestions, or even a list of questions that you would like
> answered for @preempt[ing].
> 
> That goes for anything. If at any time anyone sees something odd or not
> clear from the context while reading the code, just send a patch adding a
> question. Knowing at what point the confusion arose gives us the perfect
> place to address that for the next reader.

My point of confusion was that "if (execlists->preempt)" looks lot like
a callback hook. So I was expecting execlists->preempt()... It's not a
huge point.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH v3 13/13] drm/i915/scheduler: Support user-defined priorities
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
                   ` (10 preceding siblings ...)
  2017-09-28 19:39 ` [PATCH v3 12/13] drm/i915/execlists: Preemption! Chris Wilson
@ 2017-09-28 19:39 ` Chris Wilson
  2017-09-29  7:32   ` Joonas Lahtinen
  2017-09-28 19:58 ` [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Rodrigo Vivi
                   ` (4 subsequent siblings)
  16 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2017-09-28 19:39 UTC (permalink / raw)
  To: intel-gfx

Use a priority stored in the context as the initial value when
submitting a request. This allows us to change the default priority on a
per-context basis, allowing different contexts to be favoured with GPU
time at the expense of lower importance work. The user can adjust the
context's priority via I915_CONTEXT_PARAM_PRIORITY, with more positive
values being higher priority (they will be serviced earlier, after their
dependencies have been resolved). Any prerequisite work for an execbuf
will have its priority raised to match the new request as required.

Normal users can specify any value in the range of -1023 to 0 [default],
i.e. they can reduce the priority of their workloads (and temporarily
boost it back to normal if so desired).

Privileged users can specify any value in the range of -1023 to 1023,
[default is 0], i.e. they can raise their priority above all overs and
so potentially starve the system.

Note that the existing schedulers are not fair, nor load balancing, the
execution is strictly by priority on a first-come, first-served basis,
and the driver may choose to boost some requests above the range
available to users.

This priority was originally based around nice(2), but evolved to allow
clients to adjust their priority within a small range, and allow for a
privileged high priority range.

For example, this can be used to implement EGL_IMG_context_priority
https://www.khronos.org/registry/egl/extensions/IMG/EGL_IMG_context_priority.txt

	EGL_CONTEXT_PRIORITY_LEVEL_IMG determines the priority level of
        the context to be created. This attribute is a hint, as an
        implementation may not support multiple contexts at some
        priority levels and system policy may limit access to high
        priority contexts to appropriate system privilege level. The
        default value for EGL_CONTEXT_PRIORITY_LEVEL_IMG is
        EGL_CONTEXT_PRIORITY_MEDIUM_IMG."

so we can map

	PRIORITY_HIGH -> 1023 [privileged, will failback to 0]
	PRIORITY_MED -> 0 [default]
	PRIORITY_LOW -> -1023

They also map onto the priorities used by VkQueue (and a VkQueue is
essentially a timeline, our i915_gem_context under full-ppgtt).

v2: s/CAP_SYS_ADMIN/CAP_SYS_NICE/
v3: Report min/max user priorities as defines in the uapi, and rebase
internal priorities on the exposed values.

Testcase: igt/gem_exec_schedule
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c         |  1 +
 drivers/gpu/drm/i915/i915_gem_context.c | 23 +++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_request.h | 14 ++++++++++----
 include/uapi/drm/i915_drm.h             |  7 +++++++
 4 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a3bb352a581b..147d9fa91d1b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -370,6 +370,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		value = 0;
 		if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) {
 			value |= I915_SCHEDULER_CAP_ENABLED;
+			value |= I915_SCHEDULER_CAP_PRIORITY;
 
 			if (INTEL_INFO(dev_priv)->has_logical_ring_preemption &&
 			    i915_modparams.enable_execlists &&
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index f299b0a7c765..a2ed5fbef351 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1070,6 +1070,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_BANNABLE:
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
+	case I915_CONTEXT_PARAM_PRIORITY:
+		args->value = ctx->priority;
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -1125,6 +1128,26 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		else
 			i915_gem_context_clear_bannable(ctx);
 		break;
+
+	case I915_CONTEXT_PARAM_PRIORITY:
+		{
+			int priority = args->value;
+
+			if (args->size)
+				ret = -EINVAL;
+			else if (!to_i915(dev)->engine[RCS]->schedule)
+				ret = -ENODEV;
+			else if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
+				 priority < I915_CONTEXT_MIN_USER_PRIORITY)
+				ret = -EINVAL;
+			else if (priority > I915_CONTEXT_DEFAULT_PRIORITY &&
+				 !capable(CAP_SYS_NICE))
+				ret = -EPERM;
+			else
+				ctx->priority = priority;
+		}
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 6b9e992d01de..26249f39de67 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -30,6 +30,8 @@
 #include "i915_gem.h"
 #include "i915_sw_fence.h"
 
+#include <uapi/drm/i915_drm.h>
+
 struct drm_file;
 struct drm_i915_gem_object;
 struct drm_i915_gem_request;
@@ -69,10 +71,14 @@ struct i915_priotree {
 	struct list_head waiters_list; /* those after us, they depend upon us */
 	struct list_head link;
 	int priority;
-#define I915_PRIORITY_MAX 1024
-#define I915_PRIORITY_NORMAL 0
-#define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
-#define I915_PRIORITY_INVALID INT_MIN
+};
+
+enum {
+	I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1,
+	I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY,
+	I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1,
+
+	I915_PRIORITY_INVALID = INT_MIN
 };
 
 struct i915_gem_capture_list {
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index aa4a3b20ef6b..7266b53191ee 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -402,6 +402,9 @@ typedef struct drm_i915_irq_wait {
  * priorities and the driver will attempt to execute batches in priority order.
  * The param returns a capability bitmask, nonzero implies that the scheduler
  * is enabled, with different features present according to the mask.
+ *
+ * The initial priority for each batch is supplied by the context and is
+ * controlled via I915_CONTEXT_PARAM_PRIORITY.
  */
 #define I915_PARAM_HAS_SCHEDULER	 41
 #define   I915_SCHEDULER_CAP_ENABLED	(1ul << 0)
@@ -1367,6 +1370,10 @@ struct drm_i915_gem_context_param {
 #define I915_CONTEXT_PARAM_GTT_SIZE	0x3
 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE	0x4
 #define I915_CONTEXT_PARAM_BANNABLE	0x5
+#define I915_CONTEXT_PARAM_PRIORITY	0x6
+#define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
+#define   I915_CONTEXT_DEFAULT_PRIORITY		0
+#define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
 	__u64 value;
 };
 
-- 
2.14.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 13/13] drm/i915/scheduler: Support user-defined priorities
  2017-09-28 19:39 ` [PATCH v3 13/13] drm/i915/scheduler: Support user-defined priorities Chris Wilson
@ 2017-09-29  7:32   ` Joonas Lahtinen
  0 siblings, 0 replies; 30+ messages in thread
From: Joonas Lahtinen @ 2017-09-29  7:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Thu, 2017-09-28 at 20:39 +0100, Chris Wilson wrote:
> Use a priority stored in the context as the initial value when
> submitting a request. This allows us to change the default priority on a
> per-context basis, allowing different contexts to be favoured with GPU
> time at the expense of lower importance work. The user can adjust the
> context's priority via I915_CONTEXT_PARAM_PRIORITY, with more positive
> values being higher priority (they will be serviced earlier, after their
> dependencies have been resolved). Any prerequisite work for an execbuf
> will have its priority raised to match the new request as required.
> 
> Normal users can specify any value in the range of -1023 to 0 [default],
> i.e. they can reduce the priority of their workloads (and temporarily
> boost it back to normal if so desired).
> 
> Privileged users can specify any value in the range of -1023 to 1023,
> [default is 0], i.e. they can raise their priority above all overs and
> so potentially starve the system.
> 
> Note that the existing schedulers are not fair, nor load balancing, the
> execution is strictly by priority on a first-come, first-served basis,
> and the driver may choose to boost some requests above the range
> available to users.
> 
> This priority was originally based around nice(2), but evolved to allow
> clients to adjust their priority within a small range, and allow for a
> privileged high priority range.
> 
> For example, this can be used to implement EGL_IMG_context_priority
> https://www.khronos.org/registry/egl/extensions/IMG/EGL_IMG_context_priority.txt
> 
> 	EGL_CONTEXT_PRIORITY_LEVEL_IMG determines the priority level of
>         the context to be created. This attribute is a hint, as an
>         implementation may not support multiple contexts at some
>         priority levels and system policy may limit access to high
>         priority contexts to appropriate system privilege level. The
>         default value for EGL_CONTEXT_PRIORITY_LEVEL_IMG is
>         EGL_CONTEXT_PRIORITY_MEDIUM_IMG."
> 
> so we can map
> 
> 	PRIORITY_HIGH -> 1023 [privileged, will failback to 0]
> 	PRIORITY_MED -> 0 [default]
> 	PRIORITY_LOW -> -1023
> 
> They also map onto the priorities used by VkQueue (and a VkQueue is
> essentially a timeline, our i915_gem_context under full-ppgtt).
> 
> v2: s/CAP_SYS_ADMIN/CAP_SYS_NICE/
> v3: Report min/max user priorities as defines in the uapi, and rebase
> internal priorities on the exposed values.
> 
> Testcase: igt/gem_exec_schedule
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Didn't seem to stick last time :P

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
                   ` (11 preceding siblings ...)
  2017-09-28 19:39 ` [PATCH v3 13/13] drm/i915/scheduler: Support user-defined priorities Chris Wilson
@ 2017-09-28 19:58 ` Rodrigo Vivi
  2017-09-28 20:05   ` Chris Wilson
  2017-09-28 21:04 ` ✗ Fi.CI.BAT: failure for series starting with [v3,01/13] " Patchwork
                   ` (3 subsequent siblings)
  16 siblings, 1 reply; 30+ messages in thread
From: Rodrigo Vivi @ 2017-09-28 19:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Paulo Zanoni

On Thu, Sep 28, 2017 at 07:38:58PM +0000, Chris Wilson wrote:
> I recently tried to update the gen9 feature matrix and to my unpleasant
> surprise found that Kabylake still acted like Broadwell and didn't
> enable the feature. This is because kbl/cfl are inheriting their
> defaults from Broadwell and not Skylake.

Oh, if this is blocking all this series here we can move with this
and I do the re-org on top of this later since that one depends on
that has_ipc discussion...

only 1 nip-tick below..

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_pci.c | 21 +++++----------------
>  1 file changed, 5 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index da60866b6628..01d4b569b2cc 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -495,13 +495,9 @@ static const struct intel_device_info intel_geminilake_info __initconst = {
>  };
>  
>  #define KBL_PLATFORM \
> -	BDW_FEATURES, \
> -	.gen = 9, \
> +	SKL_PLATFORM, \
>  	.platform = INTEL_KABYLAKE, \
> -	.has_csr = 1, \
> -	.has_guc = 1, \
> -	.has_ipc = 1, \
> -	.ddb_size = 896
> +	.has_ipc = 1
>  
>  static const struct intel_device_info intel_kabylake_gt1_info __initconst = {
>  	KBL_PLATFORM,
> @@ -520,13 +516,8 @@ static const struct intel_device_info intel_kabylake_gt3_info __initconst = {
>  };
>  
>  #define CFL_PLATFORM \
> -	BDW_FEATURES, \
> -	.gen = 9, \
> -	.platform = INTEL_COFFEELAKE, \
> -	.has_csr = 1, \
> -	.has_guc = 1, \
> -	.has_ipc = 1, \
> -	.ddb_size = 896
> +	KBL_PLATFORM, \
> +	.platform = INTEL_COFFEELAKE
>  
>  static const struct intel_device_info intel_coffeelake_gt1_info __initconst = {
>  	CFL_PLATFORM,
> @@ -545,14 +536,12 @@ static const struct intel_device_info intel_coffeelake_gt3_info __initconst = {
>  };
>  
>  static const struct intel_device_info intel_cannonlake_gt2_info __initconst = {
> -	BDW_FEATURES,
> +	SKL_PLATFORM,

KBL_PLATFORM or we end up removing has_ipc from cnl...
(CFL_PLATFORM also works)

>  	.is_alpha_support = 1,
>  	.platform = INTEL_CANNONLAKE,
>  	.gen = 10,
>  	.gt = 2,
>  	.ddb_size = 1024,
> -	.has_csr = 1,
> -	.has_ipc = 1,
>  	.color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 }
>  };
>  
> -- 
> 2.14.2
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake
  2017-09-28 19:58 ` [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Rodrigo Vivi
@ 2017-09-28 20:05   ` Chris Wilson
  0 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-09-28 20:05 UTC (permalink / raw)
  To: Rodrigo Vivi; +Cc: intel-gfx, Paulo Zanoni

Quoting Rodrigo Vivi (2017-09-28 20:58:31)
> On Thu, Sep 28, 2017 at 07:38:58PM +0000, Chris Wilson wrote:
> > I recently tried to update the gen9 feature matrix and to my unpleasant
> > surprise found that Kabylake still acted like Broadwell and didn't
> > enable the feature. This is because kbl/cfl are inheriting their
> > defaults from Broadwell and not Skylake.
> 
> Oh, if this is blocking all this series here we can move with this
> and I do the re-org on top of this later since that one depends on
> that has_ipc discussion...

You have a few hours to get the "2-in-1" ready :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [v3,01/13] drm/i915: Inherit Kabylake platform features from Skylake
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
                   ` (12 preceding siblings ...)
  2017-09-28 19:58 ` [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Rodrigo Vivi
@ 2017-09-28 21:04 ` Patchwork
  2017-09-29 10:29 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  16 siblings, 0 replies; 30+ messages in thread
From: Patchwork @ 2017-09-28 21:04 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,01/13] drm/i915: Inherit Kabylake platform features from Skylake
URL   : https://patchwork.freedesktop.org/series/31092/
State : failure

== Summary ==

Series 31092v1 series starting with [v3,01/13] drm/i915: Inherit Kabylake platform features from Skylake
https://patchwork.freedesktop.org/api/1.0/series/31092/revisions/1/mbox/

Test chamelium:
        Subgroup dp-crc-fast:
                fail       -> PASS       (fi-kbl-7500u) fdo#102514
Test gem_exec_suspend:
        Subgroup basic-s3:
                pass       -> DMESG-FAIL (fi-kbl-7560u)
        Subgroup basic-s4-devices:
                pass       -> DMESG-FAIL (fi-kbl-7560u)
Test gem_flink_basic:
        Subgroup bad-flink:
                pass       -> DMESG-WARN (fi-kbl-7560u)
        Subgroup bad-open:
                pass       -> DMESG-WARN (fi-kbl-7560u)
        Subgroup basic:
                pass       -> DMESG-WARN (fi-kbl-7560u)
        Subgroup double-flink:
                pass       -> DMESG-WARN (fi-kbl-7560u)
        Subgroup flink-lifetime:
                pass       -> DMESG-WARN (fi-kbl-7560u)
Test gem_linear_blits:
        Subgroup basic:
                pass       -> INCOMPLETE (fi-kbl-7560u)
Test drv_module_reload:
        Subgroup basic-no-display:
                dmesg-warn -> PASS       (fi-glk-1) fdo#102777

fdo#102514 https://bugs.freedesktop.org/show_bug.cgi?id=102514
fdo#102777 https://bugs.freedesktop.org/show_bug.cgi?id=102777

fi-bdw-5557u     total:289  pass:268  dwarn:0   dfail:0   fail:0   skip:21  time:443s
fi-bdw-gvtdvm    total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:483s
fi-blb-e6850     total:289  pass:224  dwarn:1   dfail:0   fail:0   skip:64  time:418s
fi-bsw-n3050     total:289  pass:243  dwarn:0   dfail:0   fail:0   skip:46  time:513s
fi-bwr-2160      total:289  pass:184  dwarn:0   dfail:0   fail:0   skip:105 time:278s
fi-bxt-dsi       total:289  pass:259  dwarn:0   dfail:0   fail:0   skip:30  time:499s
fi-bxt-j4205     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:503s
fi-byt-j1900     total:289  pass:254  dwarn:1   dfail:0   fail:0   skip:34  time:492s
fi-byt-n2820     total:289  pass:250  dwarn:1   dfail:0   fail:0   skip:38  time:490s
fi-cfl-s         total:289  pass:256  dwarn:1   dfail:0   fail:0   skip:32  time:554s
fi-cnl-y         total:289  pass:260  dwarn:1   dfail:0   fail:1   skip:27  time:635s
fi-elk-e7500     total:289  pass:230  dwarn:0   dfail:0   fail:0   skip:59  time:413s
fi-glk-1         total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:567s
fi-hsw-4770      total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:426s
fi-hsw-4770r     total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:402s
fi-ilk-650       total:289  pass:229  dwarn:0   dfail:0   fail:0   skip:60  time:429s
fi-ivb-3520m     total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:491s
fi-ivb-3770      total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:468s
fi-kbl-7500u     total:289  pass:264  dwarn:1   dfail:0   fail:0   skip:24  time:473s
fi-kbl-7560u     total:125  pass:105  dwarn:5   dfail:2   fail:0   skip:12 
fi-kbl-r         total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:592s
fi-pnv-d510      total:289  pass:223  dwarn:1   dfail:0   fail:0   skip:65  time:547s
fi-skl-6260u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:459s
fi-skl-6700k     total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:750s
fi-skl-6770hq    total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:492s
fi-skl-gvtdvm    total:289  pass:266  dwarn:0   dfail:0   fail:0   skip:23  time:480s
fi-snb-2520m     total:289  pass:251  dwarn:0   dfail:0   fail:0   skip:38  time:566s
fi-snb-2600      total:289  pass:250  dwarn:0   dfail:0   fail:0   skip:39  time:414s

0369ecdbb55495dd4671ad33d375f37d0707b629 drm-tip: 2017y-09m-28d-20h-01m-55s UTC integration manifest
3e277553d4d8 drm/i915/scheduler: Support user-defined priorities
a5030d913ab3 drm/i915/execlists: Preemption!
63ffd55487c7 drm/i915: Expand I915_PARAM_HAS_SCHEDULER into a capability bitmask
a31c4b4f571f drm/i915/execlists: Keep request->priority for its lifetime
82888bf7da72 drm/i915/execlists: Move bdw GPGPU w/a to emit_bb
7de95eec2d8f drm/i915: Introduce a preempt context
ae3190509e53 drm/i915/execlists: Distinguish the incomplete context notifies
4709aab0ba2c drm/i915/preempt: Default to disabled mid-command preemption levels
d7167e0c76f1 drm/i915/preempt: Fix WaEnablePreemptionGranularityControlByUMD
6bc58077f28c drm/i915/execlists: Cache the last priolist lookup
a1ecbabad013 drm/i915: Give the invalid priority a magic name
cd27e7c63660 drm/i915/execlists: Move request unwinding to a separate function
a08eebaf7b68 drm/i915: Inherit Kabylake platform features from Skylake

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5851/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [v3,01/13] drm/i915: Inherit Kabylake platform features from Skylake
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
                   ` (13 preceding siblings ...)
  2017-09-28 21:04 ` ✗ Fi.CI.BAT: failure for series starting with [v3,01/13] " Patchwork
@ 2017-09-29 10:29 ` Patchwork
  2017-09-29 11:20 ` [PATCH v3 01/13] " David Weinehall
  2017-09-29 11:24 ` ✗ Fi.CI.IGT: failure for series starting with [v3,01/13] " Patchwork
  16 siblings, 0 replies; 30+ messages in thread
From: Patchwork @ 2017-09-29 10:29 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,01/13] drm/i915: Inherit Kabylake platform features from Skylake
URL   : https://patchwork.freedesktop.org/series/31092/
State : success

== Summary ==

Series 31092v1 series starting with [v3,01/13] drm/i915: Inherit Kabylake platform features from Skylake
https://patchwork.freedesktop.org/api/1.0/series/31092/revisions/1/mbox/

Test drv_module_reload:
        Subgroup basic-reload:
                pass       -> DMESG-WARN (fi-glk-1) fdo#102777 +1

fdo#102777 https://bugs.freedesktop.org/show_bug.cgi?id=102777

fi-bdw-5557u     total:289  pass:268  dwarn:0   dfail:0   fail:0   skip:21  time:453s
fi-bdw-gvtdvm    total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:467s
fi-blb-e6850     total:289  pass:224  dwarn:1   dfail:0   fail:0   skip:64  time:418s
fi-bsw-n3050     total:289  pass:243  dwarn:0   dfail:0   fail:0   skip:46  time:525s
fi-bwr-2160      total:289  pass:184  dwarn:0   dfail:0   fail:0   skip:105 time:278s
fi-bxt-dsi       total:289  pass:259  dwarn:0   dfail:0   fail:0   skip:30  time:502s
fi-bxt-j4205     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:520s
fi-byt-j1900     total:289  pass:254  dwarn:1   dfail:0   fail:0   skip:34  time:508s
fi-byt-n2820     total:289  pass:250  dwarn:1   dfail:0   fail:0   skip:38  time:495s
fi-cfl-s         total:289  pass:256  dwarn:1   dfail:0   fail:0   skip:32  time:554s
fi-cnl-y         total:289  pass:261  dwarn:1   dfail:0   fail:0   skip:27  time:660s
fi-elk-e7500     total:289  pass:230  dwarn:0   dfail:0   fail:0   skip:59  time:422s
fi-glk-1         total:289  pass:258  dwarn:2   dfail:0   fail:0   skip:29  time:570s
fi-hsw-4770      total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:422s
fi-hsw-4770r     total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:409s
fi-ilk-650       total:289  pass:229  dwarn:0   dfail:0   fail:0   skip:60  time:443s
fi-ivb-3520m     total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:491s
fi-ivb-3770      total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:464s
fi-kbl-7500u     total:289  pass:264  dwarn:1   dfail:0   fail:0   skip:24  time:477s
fi-kbl-7560u     total:289  pass:270  dwarn:0   dfail:0   fail:0   skip:19  time:580s
fi-kbl-r         total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:597s
fi-pnv-d510      total:289  pass:223  dwarn:1   dfail:0   fail:0   skip:65  time:542s
fi-skl-6260u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:453s
fi-skl-6700k     total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:757s
fi-skl-6770hq    total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:494s
fi-skl-gvtdvm    total:289  pass:266  dwarn:0   dfail:0   fail:0   skip:23  time:479s
fi-snb-2520m     total:289  pass:251  dwarn:0   dfail:0   fail:0   skip:38  time:576s
fi-snb-2600      total:289  pass:250  dwarn:0   dfail:0   fail:0   skip:39  time:419s

2131a7ba0b2b67aa2458f0607f26eab1b5aff119 drm-tip: 2017y-09m-29d-08h-01m-09s UTC integration manifest
5800229fd4b3 drm/i915/scheduler: Support user-defined priorities
8c2debba6f87 drm/i915/execlists: Preemption!
2825d015bead drm/i915: Expand I915_PARAM_HAS_SCHEDULER into a capability bitmask
a32885b88daa drm/i915/execlists: Keep request->priority for its lifetime
56880358b03b drm/i915/execlists: Move bdw GPGPU w/a to emit_bb
4f8bb5f250f2 drm/i915: Introduce a preempt context
3ea69de0dc41 drm/i915/execlists: Distinguish the incomplete context notifies
7c6a2373ddb6 drm/i915/preempt: Default to disabled mid-command preemption levels
df49a1f8e8c9 drm/i915/preempt: Fix WaEnablePreemptionGranularityControlByUMD
de884acabdf2 drm/i915/execlists: Cache the last priolist lookup
544f0d35fa05 drm/i915: Give the invalid priority a magic name
338c3ef2a0ee drm/i915/execlists: Move request unwinding to a separate function
70974766aae0 drm/i915: Inherit Kabylake platform features from Skylake

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5858/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
                   ` (14 preceding siblings ...)
  2017-09-29 10:29 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2017-09-29 11:20 ` David Weinehall
  2017-09-29 11:24 ` ✗ Fi.CI.IGT: failure for series starting with [v3,01/13] " Patchwork
  16 siblings, 0 replies; 30+ messages in thread
From: David Weinehall @ 2017-09-29 11:20 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Paulo Zanoni, Rodrigo Vivi

On Thu, Sep 28, 2017 at 08:38:58PM +0100, Chris Wilson wrote:
> I recently tried to update the gen9 feature matrix and to my unpleasant
> surprise found that Kabylake still acted like Broadwell and didn't
> enable the feature. This is because kbl/cfl are inheriting their
> defaults from Broadwell and not Skylake.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_pci.c | 21 +++++----------------
>  1 file changed, 5 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index da60866b6628..01d4b569b2cc 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -495,13 +495,9 @@ static const struct intel_device_info intel_geminilake_info __initconst = {
>  };
>  
>  #define KBL_PLATFORM \
> -	BDW_FEATURES, \
> -	.gen = 9, \
> +	SKL_PLATFORM, \
>  	.platform = INTEL_KABYLAKE, \
> -	.has_csr = 1, \
> -	.has_guc = 1, \
> -	.has_ipc = 1, \
> -	.ddb_size = 896
> +	.has_ipc = 1

This seems correct.

>  static const struct intel_device_info intel_kabylake_gt1_info __initconst = {
>  	KBL_PLATFORM,
> @@ -520,13 +516,8 @@ static const struct intel_device_info intel_kabylake_gt3_info __initconst = {
>  };
>  
>  #define CFL_PLATFORM \
> -	BDW_FEATURES, \
> -	.gen = 9, \
> -	.platform = INTEL_COFFEELAKE, \
> -	.has_csr = 1, \
> -	.has_guc = 1, \
> -	.has_ipc = 1, \
> -	.ddb_size = 896
> +	KBL_PLATFORM, \
> +	.platform = INTEL_COFFEELAKE

As does this.

>  static const struct intel_device_info intel_coffeelake_gt1_info __initconst = {
>  	CFL_PLATFORM,
> @@ -545,14 +536,12 @@ static const struct intel_device_info intel_coffeelake_gt3_info __initconst = {
>  };
>  
>  static const struct intel_device_info intel_cannonlake_gt2_info __initconst = {
> -	BDW_FEATURES,
> +	SKL_PLATFORM,
>  	.is_alpha_support = 1,
>  	.platform = INTEL_CANNONLAKE,
>  	.gen = 10,
>  	.gt = 2,
>  	.ddb_size = 1024,
> -	.has_csr = 1,
> -	.has_ipc = 1,
>  	.color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 }
>  };

But not this. Since you're inheriting SKL_PLATFORM rather than
KBL_PLATFORM you're missing out on .has_ipc = 1.


Kind regards, David
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [v3,01/13] drm/i915: Inherit Kabylake platform features from Skylake
  2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
                   ` (15 preceding siblings ...)
  2017-09-29 11:20 ` [PATCH v3 01/13] " David Weinehall
@ 2017-09-29 11:24 ` Patchwork
  16 siblings, 0 replies; 30+ messages in thread
From: Patchwork @ 2017-09-29 11:24 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,01/13] drm/i915: Inherit Kabylake platform features from Skylake
URL   : https://patchwork.freedesktop.org/series/31092/
State : failure

== Summary ==

Test gem_ctx_param:
        Subgroup invalid-param-get:
                pass       -> FAIL       (shard-hsw)
        Subgroup invalid-param-set:
                pass       -> FAIL       (shard-hsw)
Test kms_atomic_transition:
        Subgroup plane-all-transition-nonblocking:
                pass       -> FAIL       (shard-hsw) fdo#102671
Test kms_flip:
        Subgroup wf_vblank-vs-modeset:
                dmesg-warn -> PASS       (shard-hsw) fdo#102614
        Subgroup basic-flip-vs-modeset:
                pass       -> DMESG-WARN (shard-hsw)

fdo#102671 https://bugs.freedesktop.org/show_bug.cgi?id=102671
fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614

shard-hsw        total:2429 pass:1324 dwarn:8   dfail:0   fail:14  skip:1083 time:9839s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5858/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2017-09-29 13:14 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-09-28 19:38 [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Chris Wilson
2017-09-28 19:38 ` [PATCH v3 02/13] drm/i915/execlists: Move request unwinding to a separate function Chris Wilson
2017-09-28 19:39 ` [PATCH v3 03/13] drm/i915: Give the invalid priority a magic name Chris Wilson
2017-09-29  6:39   ` Mika Kuoppala
2017-09-29  7:01   ` Joonas Lahtinen
2017-09-28 19:39 ` [PATCH v3 04/13] drm/i915/execlists: Cache the last priolist lookup Chris Wilson
2017-09-29 11:45   ` Chris Wilson
2017-09-28 19:39 ` [PATCH v3 05/13] drm/i915/preempt: Fix WaEnablePreemptionGranularityControlByUMD Chris Wilson
2017-09-28 19:39 ` [PATCH v3 06/13] drm/i915/preempt: Default to disabled mid-command preemption levels Chris Wilson
2017-09-29  7:18   ` Joonas Lahtinen
2017-09-28 19:39 ` [PATCH v3 07/13] drm/i915/execlists: Distinguish the incomplete context notifies Chris Wilson
2017-09-28 19:39 ` [PATCH v3 08/13] drm/i915: Introduce a preempt context Chris Wilson
2017-09-29  7:23   ` Joonas Lahtinen
2017-09-28 19:39 ` [PATCH v3 09/13] drm/i915/execlists: Move bdw GPGPU w/a to emit_bb Chris Wilson
2017-09-28 19:39 ` [PATCH v3 10/13] drm/i915/execlists: Keep request->priority for its lifetime Chris Wilson
2017-09-28 19:39 ` [PATCH v3 11/13] drm/i915: Expand I915_PARAM_HAS_SCHEDULER into a capability bitmask Chris Wilson
2017-09-28 19:39 ` [PATCH v3 12/13] drm/i915/execlists: Preemption! Chris Wilson
2017-09-29  6:55   ` Mika Kuoppala
2017-09-29 10:07     ` Chris Wilson
2017-09-29  7:29   ` Joonas Lahtinen
2017-09-29 10:12     ` Chris Wilson
2017-09-29 13:14       ` Joonas Lahtinen
2017-09-28 19:39 ` [PATCH v3 13/13] drm/i915/scheduler: Support user-defined priorities Chris Wilson
2017-09-29  7:32   ` Joonas Lahtinen
2017-09-28 19:58 ` [PATCH v3 01/13] drm/i915: Inherit Kabylake platform features from Skylake Rodrigo Vivi
2017-09-28 20:05   ` Chris Wilson
2017-09-28 21:04 ` ✗ Fi.CI.BAT: failure for series starting with [v3,01/13] " Patchwork
2017-09-29 10:29 ` ✓ Fi.CI.BAT: success " Patchwork
2017-09-29 11:20 ` [PATCH v3 01/13] " David Weinehall
2017-09-29 11:24 ` ✗ Fi.CI.IGT: failure for series starting with [v3,01/13] " Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox