From: Tvrtko Ursulin <tursulin@ursulin.net>
To: Intel-gfx@lists.freedesktop.org
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Subject: [RFC 14/14] drm/i915/pmu: Wire up engine busy stats to PMU
Date: Tue, 18 Jul 2017 15:36:18 +0100 [thread overview]
Message-ID: <20170718143618.12254-15-tvrtko.ursulin@linux.intel.com> (raw)
In-Reply-To: <20170718143618.12254-1-tvrtko.ursulin@linux.intel.com>
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
We can use engine busy stats instead of the MMIO sampling timer
for better efficiency.
As minimum this saves period * num_engines / sec mmio reads,
and in a better case, when only engine busy samplers are active,
it enables us to not kick off the sampling timer at all.
It is also more accurate since it doesn't rely on sampling.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 3 ++
drivers/gpu/drm/i915/i915_pmu.c | 64 +++++++++++++++++++++++++++++++++++++----
2 files changed, 61 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f1fded6dd9cf..2986a01660d8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2603,8 +2603,11 @@ struct drm_i915_private {
spinlock_t lock;
struct hrtimer timer;
bool timer_enabled;
+ bool busy_stats;
u64 enable;
u64 sample[__I915_NUM_PMU_SAMPLERS];
+ struct work_struct enable_busy_stats;
+ struct delayed_work disable_busy_stats;
} pmu;
/*
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 4b113cad40d1..996612843594 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -29,12 +29,23 @@ static const unsigned int user_engine_map[I915_NUM_ENGINES] = {
[I915_SAMPLE_VECS] = VECS,
};
+#define ENGINE_BUSY_BITS \
+ (BIT_ULL(I915_PMU_COUNT_RCS_BUSY) | \
+ BIT_ULL(I915_PMU_COUNT_BCS_BUSY) | \
+ BIT_ULL(I915_PMU_COUNT_VCS_BUSY) | \
+ BIT_ULL(I915_PMU_COUNT_VCS2_BUSY) | \
+ BIT_ULL(I915_PMU_COUNT_VECS_BUSY))
+
static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
{
- if (gpu_active)
- return i915->pmu.enable;
- else
- return i915->pmu.enable >> 32;
+ u64 mask = i915->pmu.enable;
+
+ if (!gpu_active)
+ mask >>= 32;
+ else if (i915->pmu.busy_stats)
+ mask &= ~ENGINE_BUSY_BITS;
+
+ return mask;
}
void i915_pmu_gt_idle(struct drm_i915_private *i915)
@@ -110,7 +121,8 @@ static void engines_sample(struct drm_i915_private *dev_priv)
if (sample_mask & BIT(I915_SAMPLE_QUEUED))
engine->pmu_sample[I915_SAMPLE_QUEUED] += PERIOD;
- if (sample_mask & BIT(I915_SAMPLE_BUSY)) {
+ if ((sample_mask & BIT(I915_SAMPLE_BUSY)) &&
+ !dev_priv->pmu.busy_stats) {
fw = grab_forcewake(dev_priv, fw);
val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
if (!(val & MODE_IDLE))
@@ -337,6 +349,11 @@ static void i915_pmu_timer_cancel(struct perf_event *event)
hrtimer_cancel(&hwc->hrtimer);
}
+static bool supports_busy_stats(void)
+{
+ return i915.enable_execlists;
+}
+
static void i915_pmu_enable(struct perf_event *event)
{
struct drm_i915_private *i915 =
@@ -345,6 +362,13 @@ static void i915_pmu_enable(struct perf_event *event)
spin_lock_irqsave(&i915->pmu.lock, flags);
+ if (pmu_config_sampler(event->attr.config) == I915_SAMPLE_BUSY &&
+ supports_busy_stats() && !i915->pmu.busy_stats) {
+ i915->pmu.busy_stats = true;
+ if (!cancel_delayed_work(&i915->pmu.disable_busy_stats))
+ queue_work(i915->wq, &i915->pmu.enable_busy_stats);
+ }
+
i915->pmu.enable |= BIT_ULL(event->attr.config);
if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) {
hrtimer_start_range_ns(&i915->pmu.timer,
@@ -367,6 +391,11 @@ static void i915_pmu_disable(struct perf_event *event)
spin_lock_irqsave(&i915->pmu.lock, flags);
i915->pmu.enable &= ~BIT_ULL(event->attr.config);
i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
+ if (!(i915->pmu.enable & ENGINE_BUSY_BITS) && i915->pmu.busy_stats) {
+ i915->pmu.busy_stats = false;
+ queue_delayed_work(i915->wq, &i915->pmu.disable_busy_stats,
+ round_jiffies_up_relative(2 * HZ));
+ }
spin_unlock_irqrestore(&i915->pmu.lock, flags);
i915_pmu_timer_cancel(event);
@@ -471,7 +500,12 @@ static void i915_pmu_event_read(struct perf_event *event)
/* Do nothing */
} else {
enum intel_engine_id id = user_engine_map[user_engine];
- val = i915->engine[id]->pmu_sample[sample];
+ struct intel_engine_cs *engine = i915->engine[id];
+
+ if (i915->pmu.busy_stats && sample == I915_SAMPLE_BUSY)
+ val = intel_engine_get_current_busy_ns(engine);
+ else
+ val = engine->pmu_sample[sample];
}
} else switch (event->attr.config) {
case I915_PMU_ACTUAL_FREQUENCY:
@@ -607,6 +641,19 @@ static const struct attribute_group *i915_pmu_attr_groups[] = {
NULL
};
+static void __enable_busy_stats(struct work_struct *work)
+{
+ struct drm_i915_private *i915 =
+ container_of(work, typeof(*i915), pmu.enable_busy_stats);
+
+ WARN_ON_ONCE(intel_enable_engine_stats(i915));
+}
+
+static void __disable_busy_stats(struct work_struct *work)
+{
+ intel_disable_engine_stats();
+}
+
void i915_pmu_register(struct drm_i915_private *i915)
{
if (INTEL_GEN(i915) <= 2)
@@ -624,6 +671,8 @@ void i915_pmu_register(struct drm_i915_private *i915)
spin_lock_init(&i915->pmu.lock);
hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ INIT_WORK(&i915->pmu.enable_busy_stats, __enable_busy_stats);
+ INIT_DELAYED_WORK(&i915->pmu.disable_busy_stats, __disable_busy_stats);
i915->pmu.timer.function = i915_sample;
i915->pmu.enable = 0;
@@ -642,4 +691,7 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
i915->pmu.base.event_init = NULL;
hrtimer_cancel(&i915->pmu.timer);
+
+ flush_work(&i915->pmu.enable_busy_stats);
+ flush_delayed_work(&i915->pmu.disable_busy_stats);
}
--
2.9.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2017-07-18 14:36 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-07-18 14:36 [RFC 00/14] i915 PMU and engine busy stats Tvrtko Ursulin
2017-07-18 14:36 ` [RFC 01/14] RFC drm/i915: Expose a PMU interface for perf queries Tvrtko Ursulin
2017-07-19 9:53 ` Kamble, Sagar A
2017-07-20 8:55 ` Tvrtko Ursulin
2017-07-25 1:09 ` Ben Widawsky
2017-07-18 14:36 ` [RFC 02/14] drm/i915/pmu: Add VCS2 engine to the PMU uAPI Tvrtko Ursulin
2017-07-18 14:36 ` [RFC 03/14] drm/i915/pmu: Add queued samplers " Tvrtko Ursulin
2017-07-18 14:36 ` [RFC 04/14] drm/i915/pmu: Decouple uAPI engine ids Tvrtko Ursulin
2017-07-25 1:18 ` Ben Widawsky
2017-07-26 9:04 ` Tvrtko Ursulin
2017-07-18 14:36 ` [RFC 05/14] drm/i915/pmu: Helper to extract engine and sampler from PMU config Tvrtko Ursulin
2017-07-18 14:36 ` [RFC 06/14] drm/i915/pmu: Only sample enabled samplers Tvrtko Ursulin
2017-07-18 14:36 ` [RFC 07/14] drm/i915/pmu: Add fake regs Tvrtko Ursulin
2017-07-25 1:20 ` Ben Widawsky
2017-07-26 9:07 ` Tvrtko Ursulin
2017-07-18 14:36 ` [RFC 08/14] drm/i915/pmu: Expose events in sysfs Tvrtko Ursulin
2017-07-18 14:36 ` [RFC 09/14] drm/i915/pmu: Suspend sampling when GPU is idle Tvrtko Ursulin
2017-07-18 14:36 ` [RFC 10/14] drm/i915: Wrap context schedule notification Tvrtko Ursulin
2017-07-18 14:36 ` [RFC 11/14] drm/i915: Engine busy time tracking Tvrtko Ursulin
2017-07-18 15:19 ` Chris Wilson
2017-07-19 9:12 ` Tvrtko Ursulin
2017-07-19 10:46 ` Chris Wilson
2017-07-18 14:36 ` [RFC 12/14] drm/i915: Interface for controling engine stats collection Tvrtko Ursulin
2017-07-18 15:22 ` Chris Wilson
2017-07-19 9:30 ` Tvrtko Ursulin
2017-07-19 11:04 ` Chris Wilson
2017-07-20 9:07 ` Tvrtko Ursulin
2017-07-18 15:43 ` Chris Wilson
2017-07-18 18:43 ` Chris Wilson
2017-07-19 9:34 ` Tvrtko Ursulin
2017-07-25 1:28 ` Ben Widawsky
2017-07-18 14:36 ` [RFC 13/14] drm/i915: Export engine busy stats in debugfs Tvrtko Ursulin
2017-07-18 14:36 ` Tvrtko Ursulin [this message]
2017-07-18 14:58 ` ✓ Fi.CI.BAT: success for i915 PMU and engine busy stats Patchwork
2017-07-19 12:05 ` [RFC 00/14] " Chris Wilson
2017-07-20 9:03 ` Tvrtko Ursulin
2017-07-26 10:34 ` Tvrtko Ursulin
2017-07-26 10:55 ` Chris Wilson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170718143618.12254-15-tvrtko.ursulin@linux.intel.com \
--to=tursulin@ursulin.net \
--cc=Intel-gfx@lists.freedesktop.org \
--cc=benjamin.widawsky@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox