public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tursulin@ursulin.net>
To: Intel-gfx@lists.freedesktop.org
Subject: [RFC 09/10] drm/i915/pmu: Wire up engine busy stats to PMU
Date: Wed,  2 Aug 2017 13:32:49 +0100	[thread overview]
Message-ID: <20170802123249.14194-10-tvrtko.ursulin@linux.intel.com> (raw)
In-Reply-To: <20170802123249.14194-1-tvrtko.ursulin@linux.intel.com>

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We can use engine busy stats instead of the MMIO sampling timer
for better efficiency.

As minimum this saves period * num_engines / sec mmio reads,
and in a better case, when only engine busy samplers are active,
it enables us to not kick off the sampling timer at all.

v2: Rebase.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c         | 86 ++++++++++++++++++++++++++++-----
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
 2 files changed, 77 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 0d9c0d07a432..3272ec0763bf 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -59,6 +59,11 @@ static u64 event_enabled_mask(struct perf_event *event)
 	return config_enabled_mask(event->attr.config);
 }
 
+static bool supports_busy_stats(void)
+{
+	return i915.enable_execlists;
+}
+
 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
 {
 	u64 enable = i915->pmu.enable;
@@ -69,6 +74,8 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
 
 	if (!gpu_active)
 		enable &= ~ENGINE_SAMPLE_MASK;
+	else if (supports_busy_stats())
+		enable &= ~BIT(I915_SAMPLE_BUSY);
 
 	return enable;
 }
@@ -132,7 +139,8 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 		if (enable & BIT(I915_SAMPLE_QUEUED))
 			engine->pmu.sample[I915_SAMPLE_QUEUED] += PERIOD;
 
-		if (enable & BIT(I915_SAMPLE_BUSY)) {
+		if ((enable & BIT(I915_SAMPLE_BUSY)) &&
+		    !engine->pmu.busy_stats) {
 			u32 val;
 
 			fw = grab_forcewake(dev_priv, fw);
@@ -349,14 +357,29 @@ static void i915_pmu_timer_cancel(struct perf_event *event)
 	hrtimer_cancel(&hwc->hrtimer);
 }
 
+static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
+{
+	return supports_busy_stats() &&
+	       (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
+}
+
 static void i915_pmu_enable(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct intel_engine_cs *engine = NULL;
 	unsigned long flags;
 
 	spin_lock_irqsave(&i915->pmu.lock, flags);
 
+	if (is_engine_event(event)) {
+		engine = intel_engine_lookup_user(i915,
+						  engine_event_class(event),
+						  engine_event_instance(event));
+		GEM_BUG_ON(!engine);
+		engine->pmu.enable |= BIT(engine_event_sample(event));
+	}
+
 	i915->pmu.enable |= event_enabled_mask(event);
 
 	if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) {
@@ -364,16 +387,11 @@ static void i915_pmu_enable(struct perf_event *event)
 				       ns_to_ktime(PERIOD), 0,
 				       HRTIMER_MODE_REL_PINNED);
 		i915->pmu.timer_enabled = true;
-	}
-
-	if (is_engine_event(event)) {
-		struct intel_engine_cs *engine;
-
-		engine = intel_engine_lookup_user(i915,
-						  engine_event_class(event),
-						  engine_event_instance(event));
-		GEM_BUG_ON(!engine);
-		engine->pmu.enable |= BIT(engine_event_sample(event));
+	} else if (is_engine_event(event) && engine_needs_busy_stats(engine) &&
+		   !engine->pmu.busy_stats) {
+		engine->pmu.busy_stats = true;
+		if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+			queue_work(i915->wq, &engine->pmu.enable_busy_stats);
 	}
 
 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
@@ -399,10 +417,17 @@ static void i915_pmu_disable(struct perf_event *event)
 						  engine_event_instance(event));
 		GEM_BUG_ON(!engine);
 		engine->pmu.enable &= ~BIT(engine_event_sample(event));
+		if (engine->pmu.busy_stats &&
+		    !engine_needs_busy_stats(engine)) {
+			engine->pmu.busy_stats = false;
+			queue_delayed_work(i915->wq,
+					   &engine->pmu.disable_busy_stats,
+					   round_jiffies_up_relative(2 * HZ));
+		}
 		mask = 0;
 		for_each_engine(engine, i915, id)
 			mask |= engine->pmu.enable;
-		mask = ~mask;
+		mask = (~mask) & ENGINE_SAMPLE_MASK;
 	} else {
 		mask = event_enabled_mask(event);
 	}
@@ -474,6 +499,9 @@ static void i915_pmu_event_read(struct perf_event *event)
 
 		if (WARN_ON_ONCE(!engine)) {
 			/* Do nothing */
+		} else if (sample == I915_SAMPLE_BUSY &&
+			   engine->pmu.busy_stats) {
+			val = ktime_to_ns(intel_engine_get_busy_time(engine));
 		} else {
 			val = engine->pmu.sample[sample];
 		}
@@ -634,8 +662,27 @@ static const struct attribute_group *i915_pmu_attr_groups[] = {
         NULL
 };
 
+static void __enable_busy_stats(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+		container_of(work, typeof(*engine), pmu.enable_busy_stats);
+
+	WARN_ON_ONCE(intel_enable_engine_stats(engine));
+}
+
+static void __disable_busy_stats(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+	       container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
+
+	intel_disable_engine_stats(engine);
+}
+
 void i915_pmu_register(struct drm_i915_private *i915)
 {
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
 	if (INTEL_GEN(i915) <= 2)
 		return;
 
@@ -651,6 +698,13 @@ void i915_pmu_register(struct drm_i915_private *i915)
 
 	spin_lock_init(&i915->pmu.lock);
 	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+	for_each_engine(engine, i915, id) {
+		INIT_WORK(&engine->pmu.enable_busy_stats, __enable_busy_stats);
+		INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
+			  __disable_busy_stats);
+	}
+
 	i915->pmu.timer.function = i915_sample;
 	i915->pmu.enable = 0;
 
@@ -660,6 +714,9 @@ void i915_pmu_register(struct drm_i915_private *i915)
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
 {
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
 	if (!i915->pmu.base.event_init)
 		return;
 
@@ -669,4 +726,9 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
 	i915->pmu.base.event_init = NULL;
 
 	hrtimer_cancel(&i915->pmu.timer);
+
+	for_each_engine(engine, i915, id) {
+		flush_work(&engine->pmu.enable_busy_stats);
+		flush_delayed_work(&engine->pmu.disable_busy_stats);
+	}
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 68f50ec72be6..fd5d838ca7b5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -248,6 +248,9 @@ struct intel_engine_cs {
 	struct {
 		u32 enable;
 		u64 sample[4];
+		bool busy_stats;
+		struct work_struct enable_busy_stats;
+		struct delayed_work disable_busy_stats;
 	} pmu;
 
 	/*
-- 
2.9.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2017-08-02 12:33 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-02 12:32 [RFC v2 00/10] i915 PMU and engine busy stats Tvrtko Ursulin
2017-08-02 12:32 ` [RFC 01/10] drm/i915: Convert intel_rc6_residency_us to ns Tvrtko Ursulin
2017-08-02 12:32 ` [RFC 02/10] drm/i915: Add intel_energy_uJ Tvrtko Ursulin
2017-08-02 12:32 ` [RFC 03/10] drm/i915: Extract intel_get_cagf Tvrtko Ursulin
2017-08-02 12:32 ` [RFC 04/10] drm/i915: Expose a PMU interface for perf queries Tvrtko Ursulin
2017-08-02 13:00   ` Tvrtko Ursulin
2017-08-12  2:15     ` Rogozhkin, Dmitry V
2017-08-22 18:17       ` Peter Zijlstra
2017-08-23 17:51         ` Rogozhkin, Dmitry V
2017-08-23 18:01           ` Peter Zijlstra
2017-08-23 18:40             ` Rogozhkin, Dmitry V
2017-08-23 18:04           ` Peter Zijlstra
2017-08-23 18:38             ` Rogozhkin, Dmitry V
2017-08-23 19:28               ` Peter Zijlstra
2017-08-23 18:05           ` Peter Zijlstra
2017-08-23 18:22           ` Peter Zijlstra
2017-08-23 19:00             ` Rogozhkin, Dmitry V
2017-08-23 19:33               ` Peter Zijlstra
2017-08-28 22:57             ` Rogozhkin, Dmitry V
2017-08-29  9:30               ` Peter Zijlstra
2017-08-29 19:16                 ` Rogozhkin, Dmitry V
2017-08-29 19:21                   ` Peter Zijlstra
2017-09-13 23:05                     ` Rogozhkin, Dmitry V
2017-09-20 20:14                       ` Rogozhkin, Dmitry V
2017-10-02 21:28                         ` Rogozhkin, Dmitry V
2017-08-02 12:32 ` [RFC 05/10] drm/i915/pmu: Suspend sampling when GPU is idle Tvrtko Ursulin
2017-08-02 12:32 ` [RFC 06/10] drm/i915: Wrap context schedule notification Tvrtko Ursulin
2017-08-02 12:32 ` [RFC 07/10] drm/i915: Engine busy time tracking Tvrtko Ursulin
2017-08-02 12:32 ` [RFC 08/10] drm/i915: Export engine busy stats in debugfs Tvrtko Ursulin
2017-08-02 12:32 ` Tvrtko Ursulin [this message]
2017-08-02 12:39 ` [RFC 10/10] drm/i915: Gate engine stats collection with a static key Tvrtko Ursulin
2017-08-02 12:56 ` ✗ Fi.CI.BAT: warning for i915 PMU and engine busy stats (rev2) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170802123249.14194-10-tvrtko.ursulin@linux.intel.com \
    --to=tursulin@ursulin.net \
    --cc=Intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox