[RFC 10/10] drm/i915: Naive engine busyness based load balancing

From: Tvrtko Ursulin <tursulin@ursulin.net>
To: Intel-gfx@lists.freedesktop.org
Subject: [RFC 10/10] drm/i915: Naive engine busyness based load balancing
Date: Thu, 25 Jan 2018 13:33:33 +0000	[thread overview]
Message-ID: <20180125133333.13425-11-tvrtko.ursulin@linux.intel.com> (raw)
In-Reply-To: <20180125133333.13425-1-tvrtko.ursulin@linux.intel.com>

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

At execbuf time engine busyness since the last submission is used as basis
for determining where to submit. In case both engines are equally busy,
request is submitted to the same engine as the previous one.

Virtual engine contexts enable engine busy stats on first submission and
disable it at context destruction.

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
This is not a proposal on how to actually implement this. It since it
performs very poorly compared to similar userspace balancing strategies
and it should be considered just a prototype to illustrate the idea and
issues with forntend balancing.
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c    |  3 ++
 drivers/gpu/drm/i915/i915_gem_context.h    | 19 +++++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 62 +++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_engine_cs.c     | 35 +++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  2 +
 5 files changed, 119 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8f5f23b0dd34..a324e24f7a07 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -129,6 +129,9 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
 		struct intel_context *ce = &ctx->engine[i];
 
+		if (ctx->stats_enabled[i])
+			intel_disable_engine_stats(ctx->i915->engine[i]);
+
 		if (!ce->state)
 			continue;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 5afe050718b8..0113c161f245 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -163,6 +163,25 @@ struct i915_gem_context {
 		int pin_count;
 	} engine[I915_NUM_ENGINES];
 
+	/**
+	 * @stats_enabled: Has this context enabled per-engine stats.
+	 *
+	 * Boolean tracked per-engine.
+	 */
+	bool stats_enabled[I915_NUM_ENGINES];
+
+	/**
+	 * @prev_busy: Previous engine busyness.
+	 *
+	 * For VCS engines.
+	 */
+	u64 prev_busy[2];
+
+	/**
+	 * @prev_instance: Previously submitted to VCS instance.
+	 */
+	u8 prev_instance;
+
 	/** ring_size: size for allocating the per-engine ring buffer */
 	u32 ring_size;
 	/** desc_template: invariant fields for the HW context descriptor */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f89a7be68133..53afc4dc976e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2018,6 +2018,53 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
 	return file_priv->bsd_engine;
 }
 
+static void
+ctx_enable_stats(struct i915_gem_context *ctx, enum intel_engine_id id)
+{
+	int ret;
+
+	ret = intel_enable_engine_stats(ctx->i915->engine[id]);
+
+	ctx->stats_enabled[id] = ret == 0;
+}
+
+static u8 eb_rr_instance(struct drm_i915_private *i915)
+{
+	return atomic_fetch_xor(1, &i915->mm.bsd_engine_dispatch_index);
+}
+
+static u8 ctx_best_vcs_instance(struct i915_gem_context *ctx)
+{
+	ktime_t now = ktime_get();
+	u64 busy[2], prev_busy[2];
+	u8 instance;
+
+	busy[0] = intel_engine_get_busy_time_now(ctx->i915->engine[_VCS(0)],
+						 now);
+	busy[1] = intel_engine_get_busy_time_now(ctx->i915->engine[_VCS(1)],
+						 now);
+
+	prev_busy[0] = ctx->prev_busy[0];
+	prev_busy[1] = ctx->prev_busy[1];
+
+	ctx->prev_busy[0] = busy[0];
+	ctx->prev_busy[1] = busy[1];
+
+	busy[0] -= prev_busy[0];
+	busy[1] -= prev_busy[1];
+
+	if (busy[0] < busy[1])
+		instance = 0;
+	else if (busy[1] < busy[0])
+		instance = 1;
+	else
+		instance = ctx->prev_instance;
+
+	ctx->prev_instance = instance;
+
+	return instance;
+}
+
 static int eb_select_engine_class_instance(struct i915_execbuffer *eb)
 {
 	struct drm_i915_private *i915 = eb->i915;
@@ -2037,8 +2084,19 @@ static int eb_select_engine_class_instance(struct i915_execbuffer *eb)
 		unsigned int vcs_instances = 2;
 		struct intel_timeline *timeline;
 
-		instance = atomic_fetch_xor(1,
-					    &i915->mm.bsd_engine_dispatch_index);
+		if (intel_engine_supports_stats(i915->engine[VCS])) {
+			if (!eb->ctx->stats_enabled[_VCS(0)])
+				ctx_enable_stats(eb->ctx, _VCS(0));
+
+			if (!eb->ctx->stats_enabled[_VCS(1)])
+				ctx_enable_stats(eb->ctx, _VCS(1));
+		}
+
+		if (eb->ctx->stats_enabled[_VCS(0)] &&
+		    eb->ctx->stats_enabled[_VCS(1)])
+			instance = ctx_best_vcs_instance(eb->ctx);
+		else
+			instance = eb_rr_instance(i915);
 
 		do {
 			engine = i915->engine[_VCS(instance)];
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index e02627618bc5..d186a218809f 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -2008,6 +2008,22 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
 	return total;
 }
 
+static ktime_t
+___intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t now)
+{
+	ktime_t total = engine->stats.total;
+
+	/*
+	 * If the engine is executing something at the moment
+	 * add it to the total.
+	 */
+	if (engine->stats.active)
+		total = ktime_add(total,
+				  ktime_sub(now, engine->stats.start));
+
+	return total;
+}
+
 /**
  * intel_engine_get_busy_time() - Return current accumulated engine busyness
  * @engine: engine to report on
@@ -2026,6 +2042,25 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
 	return total;
 }
 
+/**
+ * intel_engine_get_busy_time() - Return current accumulated engine busyness
+ * @engine: engine to report on
+ *
+ * Returns accumulated time @engine was busy since engine stats were enabled.
+ */
+ktime_t
+intel_engine_get_busy_time_now(struct intel_engine_cs *engine, ktime_t now)
+{
+	ktime_t total;
+	unsigned long flags;
+
+	spin_lock_irqsave(&engine->stats.lock, flags);
+	total = ___intel_engine_get_busy_time(engine, now);
+	spin_unlock_irqrestore(&engine->stats.lock, flags);
+
+	return total;
+}
+
 /**
  * intel_disable_engine_stats() - Disable engine busy tracking on engine
  * @engine: engine to disable stats collection
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 76f7fdc926ae..6f418b37ed58 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -1102,5 +1102,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine);
 void intel_disable_engine_stats(struct intel_engine_cs *engine);
 
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
+ktime_t intel_engine_get_busy_time_now(struct intel_engine_cs *engine,
+				       ktime_t now);
 
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx