public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tursulin@ursulin.net>
To: Intel-gfx@lists.freedesktop.org
Subject: [PATCH v7 8/8] drm/i915: Gate engine stats collection with a static key
Date: Tue, 19 Sep 2017 09:43:21 +0100	[thread overview]
Message-ID: <20170919084321.15656-1-tvrtko.ursulin@linux.intel.com> (raw)
In-Reply-To: <1505752565.8257.1.camel@intel.com>

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

This reduces the cost of the software engine busyness tracking
to a single no-op instruction when there are no listeners.

v2: Rebase and some comments.
v3: Rebase.
v4: Checkpatch fixes.
v5: Rebase.
v6: Use system_long_wq to avoid being blocked by struct_mutex
    users.
v7: Fix bad conflict resolution from last rebase. (Dmitry Rogozhkin)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c         |  54 +++++++++++++++--
 drivers/gpu/drm/i915/intel_engine_cs.c  |  17 ++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h | 101 ++++++++++++++++++++------------
 3 files changed, 130 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index ffba21eeb5d0..6d22172b8fb0 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -499,11 +499,17 @@ static void i915_pmu_enable(struct perf_event *event)
 		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
 		if (engine->pmu.enable_count[sample]++ == 0) {
+			/*
+			 * Enable engine busy stats tracking if needed or
+			 * alternatively cancel the scheduled disabling of the
+			 * same.
+			 */
 			if (engine_needs_busy_stats(engine) &&
 			    !engine->pmu.busy_stats) {
-				engine->pmu.busy_stats =
-					intel_enable_engine_stats(engine) == 0;
-				WARN_ON_ONCE(!engine->pmu.busy_stats);
+				engine->pmu.busy_stats = true;
+				if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+					queue_work(system_long_wq,
+						   &engine->pmu.enable_busy_stats);
 			}
 		}
 	}
@@ -546,7 +552,15 @@ static void i915_pmu_disable(struct perf_event *event)
 			if (!engine_needs_busy_stats(engine) &&
 			    engine->pmu.busy_stats) {
 				engine->pmu.busy_stats = false;
-				intel_disable_engine_stats(engine);
+				/*
+				 * We request a delayed disable to handle the
+				 * rapid on/off cycles on events which can
+				 * happen when tools like perf stat start in a
+				 * nicer way.
+				 */
+				queue_delayed_work(system_long_wq,
+						   &engine->pmu.disable_busy_stats,
+						   round_jiffies_up_relative(HZ));
 			}
 		}
 	}
@@ -737,9 +751,27 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
 	return 0;
 }
 
+static void __enable_busy_stats(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+		container_of(work, typeof(*engine), pmu.enable_busy_stats);
+
+	WARN_ON_ONCE(intel_enable_engine_stats(engine));
+}
+
+static void __disable_busy_stats(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+	       container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
+
+	intel_disable_engine_stats(engine);
+}
+
 void i915_pmu_register(struct drm_i915_private *i915)
 {
 	int ret;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 
 	if (INTEL_GEN(i915) <= 2) {
 		DRM_INFO("PMU not supported for this GPU.");
@@ -773,6 +805,12 @@ void i915_pmu_register(struct drm_i915_private *i915)
 	i915->pmu.timer.function = i915_sample;
 	i915->pmu.enable = 0;
 
+	for_each_engine(engine, i915, id) {
+		INIT_WORK(&engine->pmu.enable_busy_stats, __enable_busy_stats);
+		INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
+				  __disable_busy_stats);
+	}
+
 	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
 	if (ret == 0)
 		return;
@@ -791,6 +829,9 @@ void i915_pmu_register(struct drm_i915_private *i915)
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
 {
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
 	if (!i915->pmu.base.event_init)
 		return;
 
@@ -802,6 +843,11 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
 
 	hrtimer_cancel(&i915->pmu.timer);
 
+	for_each_engine(engine, i915, id) {
+		flush_work(&engine->pmu.enable_busy_stats);
+		flush_delayed_work(&engine->pmu.disable_busy_stats);
+	}
+
 	perf_pmu_unregister(&i915->pmu.base);
 	i915->pmu.base.event_init = NULL;
 }
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index cbf978d39052..490b5254f2c0 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -21,6 +21,7 @@
  * IN THE SOFTWARE.
  *
  */
+#include <linux/static_key.h>
 
 #include "i915_drv.h"
 #include "intel_ringbuffer.h"
@@ -1558,6 +1559,10 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 	}
 }
 
+DEFINE_STATIC_KEY_FALSE(i915_engine_stats_key);
+static DEFINE_MUTEX(i915_engine_stats_mutex);
+static int i915_engine_stats_ref;
+
 /**
  * intel_enable_engine_stats() - Enable engine busy tracking on engine
  * @engine: engine to enable stats collection
@@ -1573,6 +1578,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 	if (!i915.enable_execlists)
 		return -ENODEV;
 
+	mutex_lock(&i915_engine_stats_mutex);
+
 	spin_lock_irqsave(&engine->stats.lock, flags);
 	if (engine->stats.enabled == ~0)
 		goto busy;
@@ -1580,10 +1587,16 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 		engine->stats.enabled_at = ktime_get();
 	spin_unlock_irqrestore(&engine->stats.lock, flags);
 
+	if (i915_engine_stats_ref++ == 0)
+		static_branch_enable(&i915_engine_stats_key);
+
+	mutex_unlock(&i915_engine_stats_mutex);
+
 	return 0;
 
 busy:
 	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	mutex_unlock(&i915_engine_stats_mutex);
 
 	return -EBUSY;
 }
@@ -1601,6 +1614,7 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine)
 	if (!i915.enable_execlists)
 		return;
 
+	mutex_lock(&i915_engine_stats_mutex);
 	spin_lock_irqsave(&engine->stats.lock, flags);
 	WARN_ON_ONCE(engine->stats.enabled == 0);
 	if (--engine->stats.enabled == 0) {
@@ -1610,6 +1624,9 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine)
 		engine->stats.total = 0;
 	}
 	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	if (--i915_engine_stats_ref == 0)
+		static_branch_disable(&i915_engine_stats_key);
+	mutex_unlock(&i915_engine_stats_mutex);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index cb58ce34ab13..a64c288ef589 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -270,6 +270,22 @@ struct intel_engine_cs {
 		 * 		requested.
 		 */
 		bool busy_stats;
+		/**
+		 * @enable_busy_stats: Work item for engine busy stats enabling.
+		 *
+		 * Since the action can sleep it needs to be decoupled from the
+		 * perf API callback.
+		 */
+		struct work_struct enable_busy_stats;
+		/**
+		 * @disable_busy_stats: Work item for busy stats disabling.
+		 *
+		 * Same as with @enable_busy_stats action, with the difference
+		 * that we delay it in case there are rapid enable-disable
+		 * actions, which can happen during tool startup (like perf
+		 * stat).
+		 */
+		struct delayed_work disable_busy_stats;
 	} pmu;
 
 	/*
@@ -805,59 +821,68 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
 struct intel_engine_cs *
 intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
 
+DECLARE_STATIC_KEY_FALSE(i915_engine_stats_key);
+
 static inline void intel_engine_context_in(struct intel_engine_cs *engine)
 {
 	unsigned long flags;
 
-	if (READ_ONCE(engine->stats.enabled) == 0)
-		return;
+	if (static_branch_unlikely(&i915_engine_stats_key)) {
+		if (READ_ONCE(engine->stats.enabled) == 0)
+			return;
 
-	spin_lock_irqsave(&engine->stats.lock, flags);
+		spin_lock_irqsave(&engine->stats.lock, flags);
 
-	if (engine->stats.enabled > 0) {
-		if (engine->stats.active++ == 0)
-			engine->stats.start = ktime_get();
-		GEM_BUG_ON(engine->stats.active == 0);
-	}
+			if (engine->stats.enabled > 0) {
+				if (engine->stats.active++ == 0)
+					engine->stats.start = ktime_get();
+				GEM_BUG_ON(engine->stats.active == 0);
+			}
 
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+		spin_unlock_irqrestore(&engine->stats.lock, flags);
+	}
 }
 
 static inline void intel_engine_context_out(struct intel_engine_cs *engine)
 {
 	unsigned long flags;
 
-	if (READ_ONCE(engine->stats.enabled) == 0)
-		return;
-
-	spin_lock_irqsave(&engine->stats.lock, flags);
-
-	if (engine->stats.enabled > 0) {
-		ktime_t last, now = ktime_get();
-
-		if (engine->stats.active && --engine->stats.active == 0) {
-			/*
-			 * Decrement the active context count and in case GPU
-			 * is now idle add up to the running total.
-			 */
-			last = ktime_sub(now, engine->stats.start);
-
-			engine->stats.total = ktime_add(engine->stats.total,
-							last);
-		} else if (engine->stats.active == 0) {
-			/*
-			 * After turning on engine stats, context out might be
-			 * the first event in which case we account from the
-			 * time stats gathering was turned on.
-			 */
-			last = ktime_sub(now, engine->stats.enabled_at);
-
-			engine->stats.total = ktime_add(engine->stats.total,
-							last);
+	if (static_branch_unlikely(&i915_engine_stats_key)) {
+		if (READ_ONCE(engine->stats.enabled) == 0)
+			return;
+
+		spin_lock_irqsave(&engine->stats.lock, flags);
+
+		if (engine->stats.enabled > 0) {
+			ktime_t last, now = ktime_get();
+
+			if (engine->stats.active &&
+			    --engine->stats.active == 0) {
+				/*
+				 * Decrement the active context count and in
+				 * case GPU is now idle add up to the running
+				 * total.
+				 */
+				last = ktime_sub(now, engine->stats.start);
+
+				engine->stats.total =
+					ktime_add(engine->stats.total, last);
+			} else if (engine->stats.active == 0) {
+				/*
+				 * After turning on engine stats, context out
+				 * might be the first event in which case we
+				 * account from the time stats gathering was
+				 * turned on.
+				 */
+				last = ktime_sub(now, engine->stats.enabled_at);
+
+				engine->stats.total =
+					ktime_add(engine->stats.total, last);
+			}
 		}
-	}
 
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+		spin_unlock_irqrestore(&engine->stats.lock, flags);
+	}
 }
 
 int intel_enable_engine_stats(struct intel_engine_cs *engine);
-- 
2.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2017-09-19  8:43 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-18 11:38 [PATCH v4 00/8] i915 PMU and engine busy stats Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 1/8] drm/i915: Convert intel_rc6_residency_us to ns Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 2/8] drm/i915: Extract intel_get_cagf Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 3/8] drm/i915/pmu: Expose a PMU interface for perf queries Tvrtko Ursulin
2017-09-20 16:02   ` [PATCH v10 " Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 4/8] drm/i915/pmu: Suspend sampling when GPU is idle Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 5/8] drm/i915: Wrap context schedule notification Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 6/8] drm/i915: Engine busy time tracking Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 7/8] drm/i915/pmu: Wire up engine busy stats to PMU Tvrtko Ursulin
2017-09-18 14:58   ` Chris Wilson
2017-09-19  8:46     ` Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 8/8] drm/i915: Gate engine stats collection with a static key Tvrtko Ursulin
2017-09-19  0:38   ` Rogozhkin, Dmitry V
2017-09-19  8:43     ` Tvrtko Ursulin [this message]
2017-09-18 14:00 ` ✓ Fi.CI.BAT: success for i915 PMU and engine busy stats (rev9) Patchwork
2017-09-18 15:37 ` [PATCH v4 00/8] i915 PMU and engine busy stats Chris Wilson
2017-09-19  8:15   ` Tvrtko Ursulin
2017-09-18 16:30 ` ✓ Fi.CI.IGT: success for i915 PMU and engine busy stats (rev9) Patchwork
2017-09-19 18:33 ` ✓ Fi.CI.BAT: success for i915 PMU and engine busy stats (rev10) Patchwork
2017-09-19 21:00 ` ✓ Fi.CI.IGT: " Patchwork
2017-09-20 16:29 ` ✗ Fi.CI.BAT: warning for i915 PMU and engine busy stats (rev11) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170919084321.15656-1-tvrtko.ursulin@linux.intel.com \
    --to=tursulin@ursulin.net \
    --cc=Intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox