All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tursulin@ursulin.net>
To: Intel-gfx@lists.freedesktop.org
Subject: [PATCH v7 8/8] drm/i915: Gate engine stats collection with a static key
Date: Tue, 19 Sep 2017 09:43:21 +0100	[thread overview]
Message-ID: <20170919084321.15656-1-tvrtko.ursulin@linux.intel.com> (raw)
In-Reply-To: <1505752565.8257.1.camel@intel.com>

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

This reduces the cost of the software engine busyness tracking
to a single no-op instruction when there are no listeners.

v2: Rebase and some comments.
v3: Rebase.
v4: Checkpatch fixes.
v5: Rebase.
v6: Use system_long_wq to avoid being blocked by struct_mutex
    users.
v7: Fix bad conflict resolution from last rebase. (Dmitry Rogozhkin)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c         |  54 +++++++++++++++--
 drivers/gpu/drm/i915/intel_engine_cs.c  |  17 ++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h | 101 ++++++++++++++++++++------------
 3 files changed, 130 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index ffba21eeb5d0..6d22172b8fb0 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -499,11 +499,17 @@ static void i915_pmu_enable(struct perf_event *event)
 		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
 		if (engine->pmu.enable_count[sample]++ == 0) {
+			/*
+			 * Enable engine busy stats tracking if needed or
+			 * alternatively cancel the scheduled disabling of the
+			 * same.
+			 */
 			if (engine_needs_busy_stats(engine) &&
 			    !engine->pmu.busy_stats) {
-				engine->pmu.busy_stats =
-					intel_enable_engine_stats(engine) == 0;
-				WARN_ON_ONCE(!engine->pmu.busy_stats);
+				engine->pmu.busy_stats = true;
+				if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+					queue_work(system_long_wq,
+						   &engine->pmu.enable_busy_stats);
 			}
 		}
 	}
@@ -546,7 +552,15 @@ static void i915_pmu_disable(struct perf_event *event)
 			if (!engine_needs_busy_stats(engine) &&
 			    engine->pmu.busy_stats) {
 				engine->pmu.busy_stats = false;
-				intel_disable_engine_stats(engine);
+				/*
+				 * We request a delayed disable to handle the
+				 * rapid on/off cycles on events which can
+				 * happen when tools like perf stat start in a
+				 * nicer way.
+				 */
+				queue_delayed_work(system_long_wq,
+						   &engine->pmu.disable_busy_stats,
+						   round_jiffies_up_relative(HZ));
 			}
 		}
 	}
@@ -737,9 +751,27 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
 	return 0;
 }
 
+static void __enable_busy_stats(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+		container_of(work, typeof(*engine), pmu.enable_busy_stats);
+
+	WARN_ON_ONCE(intel_enable_engine_stats(engine));
+}
+
+static void __disable_busy_stats(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+	       container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
+
+	intel_disable_engine_stats(engine);
+}
+
 void i915_pmu_register(struct drm_i915_private *i915)
 {
 	int ret;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 
 	if (INTEL_GEN(i915) <= 2) {
 		DRM_INFO("PMU not supported for this GPU.");
@@ -773,6 +805,12 @@ void i915_pmu_register(struct drm_i915_private *i915)
 	i915->pmu.timer.function = i915_sample;
 	i915->pmu.enable = 0;
 
+	for_each_engine(engine, i915, id) {
+		INIT_WORK(&engine->pmu.enable_busy_stats, __enable_busy_stats);
+		INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
+				  __disable_busy_stats);
+	}
+
 	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
 	if (ret == 0)
 		return;
@@ -791,6 +829,9 @@ void i915_pmu_register(struct drm_i915_private *i915)
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
 {
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
 	if (!i915->pmu.base.event_init)
 		return;
 
@@ -802,6 +843,11 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
 
 	hrtimer_cancel(&i915->pmu.timer);
 
+	for_each_engine(engine, i915, id) {
+		flush_work(&engine->pmu.enable_busy_stats);
+		flush_delayed_work(&engine->pmu.disable_busy_stats);
+	}
+
 	perf_pmu_unregister(&i915->pmu.base);
 	i915->pmu.base.event_init = NULL;
 }
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index cbf978d39052..490b5254f2c0 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -21,6 +21,7 @@
  * IN THE SOFTWARE.
  *
  */
+#include <linux/static_key.h>
 
 #include "i915_drv.h"
 #include "intel_ringbuffer.h"
@@ -1558,6 +1559,10 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 	}
 }
 
+DEFINE_STATIC_KEY_FALSE(i915_engine_stats_key);
+static DEFINE_MUTEX(i915_engine_stats_mutex);
+static int i915_engine_stats_ref;
+
 /**
  * intel_enable_engine_stats() - Enable engine busy tracking on engine
  * @engine: engine to enable stats collection
@@ -1573,6 +1578,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 	if (!i915.enable_execlists)
 		return -ENODEV;
 
+	mutex_lock(&i915_engine_stats_mutex);
+
 	spin_lock_irqsave(&engine->stats.lock, flags);
 	if (engine->stats.enabled == ~0)
 		goto busy;
@@ -1580,10 +1587,16 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 		engine->stats.enabled_at = ktime_get();
 	spin_unlock_irqrestore(&engine->stats.lock, flags);
 
+	if (i915_engine_stats_ref++ == 0)
+		static_branch_enable(&i915_engine_stats_key);
+
+	mutex_unlock(&i915_engine_stats_mutex);
+
 	return 0;
 
 busy:
 	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	mutex_unlock(&i915_engine_stats_mutex);
 
 	return -EBUSY;
 }
@@ -1601,6 +1614,7 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine)
 	if (!i915.enable_execlists)
 		return;
 
+	mutex_lock(&i915_engine_stats_mutex);
 	spin_lock_irqsave(&engine->stats.lock, flags);
 	WARN_ON_ONCE(engine->stats.enabled == 0);
 	if (--engine->stats.enabled == 0) {
@@ -1610,6 +1624,9 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine)
 		engine->stats.total = 0;
 	}
 	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	if (--i915_engine_stats_ref == 0)
+		static_branch_disable(&i915_engine_stats_key);
+	mutex_unlock(&i915_engine_stats_mutex);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index cb58ce34ab13..a64c288ef589 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -270,6 +270,22 @@ struct intel_engine_cs {
 		 * 		requested.
 		 */
 		bool busy_stats;
+		/**
+		 * @enable_busy_stats: Work item for engine busy stats enabling.
+		 *
+		 * Since the action can sleep it needs to be decoupled from the
+		 * perf API callback.
+		 */
+		struct work_struct enable_busy_stats;
+		/**
+		 * @disable_busy_stats: Work item for busy stats disabling.
+		 *
+		 * Same as with @enable_busy_stats action, with the difference
+		 * that we delay it in case there are rapid enable-disable
+		 * actions, which can happen during tool startup (like perf
+		 * stat).
+		 */
+		struct delayed_work disable_busy_stats;
 	} pmu;
 
 	/*
@@ -805,59 +821,68 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
 struct intel_engine_cs *
 intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
 
+DECLARE_STATIC_KEY_FALSE(i915_engine_stats_key);
+
 static inline void intel_engine_context_in(struct intel_engine_cs *engine)
 {
 	unsigned long flags;
 
-	if (READ_ONCE(engine->stats.enabled) == 0)
-		return;
+	if (static_branch_unlikely(&i915_engine_stats_key)) {
+		if (READ_ONCE(engine->stats.enabled) == 0)
+			return;
 
-	spin_lock_irqsave(&engine->stats.lock, flags);
+		spin_lock_irqsave(&engine->stats.lock, flags);
 
-	if (engine->stats.enabled > 0) {
-		if (engine->stats.active++ == 0)
-			engine->stats.start = ktime_get();
-		GEM_BUG_ON(engine->stats.active == 0);
-	}
+			if (engine->stats.enabled > 0) {
+				if (engine->stats.active++ == 0)
+					engine->stats.start = ktime_get();
+				GEM_BUG_ON(engine->stats.active == 0);
+			}
 
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+		spin_unlock_irqrestore(&engine->stats.lock, flags);
+	}
 }
 
 static inline void intel_engine_context_out(struct intel_engine_cs *engine)
 {
 	unsigned long flags;
 
-	if (READ_ONCE(engine->stats.enabled) == 0)
-		return;
-
-	spin_lock_irqsave(&engine->stats.lock, flags);
-
-	if (engine->stats.enabled > 0) {
-		ktime_t last, now = ktime_get();
-
-		if (engine->stats.active && --engine->stats.active == 0) {
-			/*
-			 * Decrement the active context count and in case GPU
-			 * is now idle add up to the running total.
-			 */
-			last = ktime_sub(now, engine->stats.start);
-
-			engine->stats.total = ktime_add(engine->stats.total,
-							last);
-		} else if (engine->stats.active == 0) {
-			/*
-			 * After turning on engine stats, context out might be
-			 * the first event in which case we account from the
-			 * time stats gathering was turned on.
-			 */
-			last = ktime_sub(now, engine->stats.enabled_at);
-
-			engine->stats.total = ktime_add(engine->stats.total,
-							last);
+	if (static_branch_unlikely(&i915_engine_stats_key)) {
+		if (READ_ONCE(engine->stats.enabled) == 0)
+			return;
+
+		spin_lock_irqsave(&engine->stats.lock, flags);
+
+		if (engine->stats.enabled > 0) {
+			ktime_t last, now = ktime_get();
+
+			if (engine->stats.active &&
+			    --engine->stats.active == 0) {
+				/*
+				 * Decrement the active context count and in
+				 * case GPU is now idle add up to the running
+				 * total.
+				 */
+				last = ktime_sub(now, engine->stats.start);
+
+				engine->stats.total =
+					ktime_add(engine->stats.total, last);
+			} else if (engine->stats.active == 0) {
+				/*
+				 * After turning on engine stats, context out
+				 * might be the first event in which case we
+				 * account from the time stats gathering was
+				 * turned on.
+				 */
+				last = ktime_sub(now, engine->stats.enabled_at);
+
+				engine->stats.total =
+					ktime_add(engine->stats.total, last);
+			}
 		}
-	}
 
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+		spin_unlock_irqrestore(&engine->stats.lock, flags);
+	}
 }
 
 int intel_enable_engine_stats(struct intel_engine_cs *engine);
-- 
2.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2017-09-19  8:43 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-18 11:38 [PATCH v4 00/8] i915 PMU and engine busy stats Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 1/8] drm/i915: Convert intel_rc6_residency_us to ns Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 2/8] drm/i915: Extract intel_get_cagf Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 3/8] drm/i915/pmu: Expose a PMU interface for perf queries Tvrtko Ursulin
2017-09-20 16:02   ` [PATCH v10 " Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 4/8] drm/i915/pmu: Suspend sampling when GPU is idle Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 5/8] drm/i915: Wrap context schedule notification Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 6/8] drm/i915: Engine busy time tracking Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 7/8] drm/i915/pmu: Wire up engine busy stats to PMU Tvrtko Ursulin
2017-09-18 14:58   ` Chris Wilson
2017-09-19  8:46     ` Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH 8/8] drm/i915: Gate engine stats collection with a static key Tvrtko Ursulin
2017-09-19  0:38   ` Rogozhkin, Dmitry V
2017-09-19  8:43     ` Tvrtko Ursulin [this message]
2017-09-18 14:00 ` ✓ Fi.CI.BAT: success for i915 PMU and engine busy stats (rev9) Patchwork
2017-09-18 15:37 ` [PATCH v4 00/8] i915 PMU and engine busy stats Chris Wilson
2017-09-19  8:15   ` Tvrtko Ursulin
2017-09-18 16:30 ` ✓ Fi.CI.IGT: success for i915 PMU and engine busy stats (rev9) Patchwork
2017-09-19 18:33 ` ✓ Fi.CI.BAT: success for i915 PMU and engine busy stats (rev10) Patchwork
2017-09-19 21:00 ` ✓ Fi.CI.IGT: " Patchwork
2017-09-20 16:29 ` ✗ Fi.CI.BAT: warning for i915 PMU and engine busy stats (rev11) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170919084321.15656-1-tvrtko.ursulin@linux.intel.com \
    --to=tursulin@ursulin.net \
    --cc=Intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.