From: Tvrtko Ursulin <tursulin@ursulin.net>
To: Intel-gfx@lists.freedesktop.org
Subject: [PATCH v10 07/10] drm/i915: Gate engine stats collection with a static key
Date: Thu, 5 Oct 2017 14:05:26 +0100 [thread overview]
Message-ID: <20171005130526.28624-1-tvrtko.ursulin@linux.intel.com> (raw)
In-Reply-To: <150702584551.20052.11836083647133754478@mail.alporthouse.com>
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
This reduces the cost of the software engine busyness tracking
to a single no-op instruction when there are no listeners.
We add a new i915 ordered workqueue to be used only for tasks
not needing struct mutex.
v2: Rebase and some comments.
v3: Rebase.
v4: Checkpatch fixes.
v5: Rebase.
v6: Use system_long_wq to avoid being blocked by struct_mutex
users.
v7: Fix bad conflict resolution from last rebase. (Dmitry Rogozhkin)
v8: Rebase.
v9:
* Fix race between unordered enable followed by disable.
(Chris Wilson)
* Prettify order of local variable declarations. (Chris Wilson)
v10:
Chris Wilson:
* Fix workqueue allocation failure handling.
* Expand comment about critical workqueue ordering.
* Add some GEM_BUG_ONs to document impossible conditions.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_drv.c | 13 +++-
drivers/gpu/drm/i915/i915_drv.h | 5 ++
drivers/gpu/drm/i915/i915_pmu.c | 65 ++++++++++++++++++--
drivers/gpu/drm/i915/intel_engine_cs.c | 17 ++++++
drivers/gpu/drm/i915/intel_ringbuffer.h | 101 ++++++++++++++++++++------------
5 files changed, 158 insertions(+), 43 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d373a840566a..3853038c6fdd 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -804,12 +804,22 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv)
if (dev_priv->wq == NULL)
goto out_err;
+ /*
+ * Ordered workqueue for tasks not needing the global mutex but
+ * which still need to be serialized.
+ */
+ dev_priv->wq_misc = alloc_ordered_workqueue("i915-misc", 0);
+ if (dev_priv->wq_misc == NULL)
+ goto out_free_wq;
+
dev_priv->hotplug.dp_wq = alloc_ordered_workqueue("i915-dp", 0);
if (dev_priv->hotplug.dp_wq == NULL)
- goto out_free_wq;
+ goto out_free_wq_misc;
return 0;
+out_free_wq_misc:
+ destroy_workqueue(dev_priv->wq_misc);
out_free_wq:
destroy_workqueue(dev_priv->wq);
out_err:
@@ -830,6 +840,7 @@ static void i915_engines_cleanup(struct drm_i915_private *i915)
static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv)
{
destroy_workqueue(dev_priv->hotplug.dp_wq);
+ destroy_workqueue(dev_priv->wq_misc);
destroy_workqueue(dev_priv->wq);
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2dabe671bb50..efa3a8423cca 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2349,6 +2349,11 @@ struct drm_i915_private {
*/
struct workqueue_struct *wq;
+ /**
+ * @wq_misc: Workqueue for serialized tasks not needing struct mutex.
+ */
+ struct workqueue_struct *wq_misc;
+
/* Display functions */
struct drm_i915_display_funcs display;
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 2671e3640639..d552e415becd 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -458,11 +458,20 @@ static void i915_pmu_enable(struct perf_event *event)
GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
if (engine->pmu.enable_count[sample]++ == 0) {
+ /*
+ * Enable engine busy stats tracking if needed or
+ * alternatively cancel the scheduled disabling of the
+ * same.
+ *
+ * If the delayed disable was pending, cancel it and in
+ * this case no need to queue a new enable.
+ */
if (engine_needs_busy_stats(engine) &&
!engine->pmu.busy_stats) {
- engine->pmu.busy_stats =
- intel_enable_engine_stats(engine) == 0;
- WARN_ON_ONCE(!engine->pmu.busy_stats);
+ engine->pmu.busy_stats = true;
+ if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+ queue_work(i915->wq_misc,
+ &engine->pmu.enable_busy_stats);
}
}
}
@@ -505,7 +514,22 @@ static void i915_pmu_disable(struct perf_event *event)
if (!engine_needs_busy_stats(engine) &&
engine->pmu.busy_stats) {
engine->pmu.busy_stats = false;
- intel_disable_engine_stats(engine);
+ /*
+ * We request a delayed disable to handle the
+ * rapid on/off cycles on events, which can
+ * happen when tools like perf stat start, in a
+ * nicer way.
+ *
+ * PMU enable and disable callbacks are
+ * serialized with the spinlock, and since we do
+ * them in decoupled fashion from workers, it is
+ * critical to use an ordered work queue to
+ * preserve this ordering between the two
+ * events.
+ */
+ queue_delayed_work(i915->wq_misc,
+ &engine->pmu.disable_busy_stats,
+ round_jiffies_up_relative(HZ));
}
}
}
@@ -689,8 +713,26 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
return 0;
}
+static void __enable_busy_stats(struct work_struct *work)
+{
+ struct intel_engine_cs *engine =
+ container_of(work, typeof(*engine), pmu.enable_busy_stats);
+
+ WARN_ON_ONCE(intel_enable_engine_stats(engine));
+}
+
+static void __disable_busy_stats(struct work_struct *work)
+{
+ struct intel_engine_cs *engine =
+ container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
+
+ intel_disable_engine_stats(engine);
+}
+
void i915_pmu_register(struct drm_i915_private *i915)
{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
int ret;
if (INTEL_GEN(i915) <= 2) {
@@ -725,6 +767,12 @@ void i915_pmu_register(struct drm_i915_private *i915)
i915->pmu.timer.function = i915_sample;
i915->pmu.enable = 0;
+ for_each_engine(engine, i915, id) {
+ INIT_WORK(&engine->pmu.enable_busy_stats, __enable_busy_stats);
+ INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
+ __disable_busy_stats);
+ }
+
ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
if (ret == 0)
return;
@@ -743,6 +791,9 @@ void i915_pmu_register(struct drm_i915_private *i915)
void i915_pmu_unregister(struct drm_i915_private *i915)
{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
if (!i915->pmu.base.event_init)
return;
@@ -754,6 +805,12 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
hrtimer_cancel(&i915->pmu.timer);
+ for_each_engine(engine, i915, id) {
+ GEM_BUG_ON(engine->pmu.busy_stats);
+ GEM_BUG_ON(flush_work(&engine->pmu.enable_busy_stats));
+ flush_delayed_work(&engine->pmu.disable_busy_stats);
+ }
+
perf_pmu_unregister(&i915->pmu.base);
i915->pmu.base.event_init = NULL;
}
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 53d3b8f3c222..cbfde1149822 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -21,6 +21,7 @@
* IN THE SOFTWARE.
*
*/
+#include <linux/static_key.h>
#include "i915_drv.h"
#include "intel_ringbuffer.h"
@@ -1653,6 +1654,10 @@ intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
return i915->engine_class[class][instance];
}
+DEFINE_STATIC_KEY_FALSE(i915_engine_stats_key);
+static DEFINE_MUTEX(i915_engine_stats_mutex);
+static int i915_engine_stats_ref;
+
/**
* intel_enable_engine_stats() - Enable engine busy tracking on engine
* @engine: engine to enable stats collection
@@ -1668,6 +1673,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
if (!i915_modparams.enable_execlists)
return -ENODEV;
+ mutex_lock(&i915_engine_stats_mutex);
+
spin_lock_irqsave(&engine->stats.lock, flags);
if (engine->stats.enabled == ~0)
goto busy;
@@ -1675,10 +1682,16 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
engine->stats.enabled_at = ktime_get();
spin_unlock_irqrestore(&engine->stats.lock, flags);
+ if (i915_engine_stats_ref++ == 0)
+ static_branch_enable(&i915_engine_stats_key);
+
+ mutex_unlock(&i915_engine_stats_mutex);
+
return 0;
busy:
spin_unlock_irqrestore(&engine->stats.lock, flags);
+ mutex_unlock(&i915_engine_stats_mutex);
return -EBUSY;
}
@@ -1696,6 +1709,7 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine)
if (!i915_modparams.enable_execlists)
return;
+ mutex_lock(&i915_engine_stats_mutex);
spin_lock_irqsave(&engine->stats.lock, flags);
WARN_ON_ONCE(engine->stats.enabled == 0);
if (--engine->stats.enabled == 0) {
@@ -1705,6 +1719,9 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine)
engine->stats.total = 0;
}
spin_unlock_irqrestore(&engine->stats.lock, flags);
+ if (--i915_engine_stats_ref == 0)
+ static_branch_disable(&i915_engine_stats_key);
+ mutex_unlock(&i915_engine_stats_mutex);
}
/**
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e61ab4f3dc90..adcc79d2e414 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -361,6 +361,22 @@ struct intel_engine_cs {
* requested.
*/
bool busy_stats;
+ /**
+ * @enable_busy_stats: Work item for engine busy stats enabling.
+ *
+ * Since the action can sleep it needs to be decoupled from the
+ * perf API callback.
+ */
+ struct work_struct enable_busy_stats;
+ /**
+ * @disable_busy_stats: Work item for busy stats disabling.
+ *
+ * Same as with @enable_busy_stats action, with the difference
+ * that we delay it in case there are rapid enable-disable
+ * actions, which can happen during tool startup (like perf
+ * stat).
+ */
+ struct delayed_work disable_busy_stats;
} pmu;
/*
@@ -902,59 +918,68 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
struct intel_engine_cs *
intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
+DECLARE_STATIC_KEY_FALSE(i915_engine_stats_key);
+
static inline void intel_engine_context_in(struct intel_engine_cs *engine)
{
unsigned long flags;
- if (READ_ONCE(engine->stats.enabled) == 0)
- return;
+ if (static_branch_unlikely(&i915_engine_stats_key)) {
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
- spin_lock_irqsave(&engine->stats.lock, flags);
+ spin_lock_irqsave(&engine->stats.lock, flags);
- if (engine->stats.enabled > 0) {
- if (engine->stats.active++ == 0)
- engine->stats.start = ktime_get();
- GEM_BUG_ON(engine->stats.active == 0);
- }
+ if (engine->stats.enabled > 0) {
+ if (engine->stats.active++ == 0)
+ engine->stats.start = ktime_get();
+ GEM_BUG_ON(engine->stats.active == 0);
+ }
- spin_unlock_irqrestore(&engine->stats.lock, flags);
+ spin_unlock_irqrestore(&engine->stats.lock, flags);
+ }
}
static inline void intel_engine_context_out(struct intel_engine_cs *engine)
{
unsigned long flags;
- if (READ_ONCE(engine->stats.enabled) == 0)
- return;
-
- spin_lock_irqsave(&engine->stats.lock, flags);
-
- if (engine->stats.enabled > 0) {
- ktime_t last, now = ktime_get();
-
- if (engine->stats.active && --engine->stats.active == 0) {
- /*
- * Decrement the active context count and in case GPU
- * is now idle add up to the running total.
- */
- last = ktime_sub(now, engine->stats.start);
-
- engine->stats.total = ktime_add(engine->stats.total,
- last);
- } else if (engine->stats.active == 0) {
- /*
- * After turning on engine stats, context out might be
- * the first event in which case we account from the
- * time stats gathering was turned on.
- */
- last = ktime_sub(now, engine->stats.enabled_at);
-
- engine->stats.total = ktime_add(engine->stats.total,
- last);
+ if (static_branch_unlikely(&i915_engine_stats_key)) {
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ spin_lock_irqsave(&engine->stats.lock, flags);
+
+ if (engine->stats.enabled > 0) {
+ ktime_t last, now = ktime_get();
+
+ if (engine->stats.active &&
+ --engine->stats.active == 0) {
+ /*
+ * Decrement the active context count and in
+ * case GPU is now idle add up to the running
+ * total.
+ */
+ last = ktime_sub(now, engine->stats.start);
+
+ engine->stats.total =
+ ktime_add(engine->stats.total, last);
+ } else if (engine->stats.active == 0) {
+ /*
+ * After turning on engine stats, context out
+ * might be the first event in which case we
+ * account from the time stats gathering was
+ * turned on.
+ */
+ last = ktime_sub(now, engine->stats.enabled_at);
+
+ engine->stats.total =
+ ktime_add(engine->stats.total, last);
+ }
}
- }
- spin_unlock_irqrestore(&engine->stats.lock, flags);
+ spin_unlock_irqrestore(&engine->stats.lock, flags);
+ }
}
int intel_enable_engine_stats(struct intel_engine_cs *engine);
--
2.9.5
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2017-10-05 13:05 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-09-29 12:34 [PATCH v6 00/10] i915 PMU and engine busy stats Tvrtko Ursulin
2017-09-29 12:34 ` [PATCH 01/10] drm/i915: Extract intel_get_cagf Tvrtko Ursulin
2017-09-29 12:34 ` [PATCH 02/10] drm/i915/pmu: Expose a PMU interface for perf queries Tvrtko Ursulin
2017-09-29 12:46 ` Chris Wilson
2017-10-05 13:04 ` [PATCH v13 " Tvrtko Ursulin
2017-09-29 12:34 ` [PATCH 03/10] drm/i915/pmu: Suspend sampling when GPU is idle Tvrtko Ursulin
2017-09-29 12:34 ` [PATCH 04/10] drm/i915: Wrap context schedule notification Tvrtko Ursulin
2017-09-29 12:34 ` [PATCH 05/10] drm/i915: Engine busy time tracking Tvrtko Ursulin
2017-09-29 12:47 ` Chris Wilson
2017-09-29 12:34 ` [PATCH 06/10] drm/i915/pmu: Wire up engine busy stats to PMU Tvrtko Ursulin
2017-10-04 17:35 ` Tvrtko Ursulin
2017-10-04 17:52 ` Rogozhkin, Dmitry V
2017-09-29 12:34 ` [PATCH 07/10] drm/i915: Gate engine stats collection with a static key Tvrtko Ursulin
2017-10-03 10:17 ` Chris Wilson
2017-10-04 17:38 ` Tvrtko Ursulin
2017-10-04 17:49 ` Chris Wilson
2017-10-05 7:07 ` Tvrtko Ursulin
2017-10-05 9:04 ` Chris Wilson
2017-10-05 13:05 ` Tvrtko Ursulin [this message]
2017-09-29 12:34 ` [PATCH 08/10] drm/i915/pmu: Add interrupt count metric Tvrtko Ursulin
2017-09-29 12:53 ` Chris Wilson
2017-09-29 12:34 ` [PATCH 09/10] drm/i915: Convert intel_rc6_residency_us to ns Tvrtko Ursulin
2017-09-29 12:35 ` [PATCH 10/10] drm/i915/pmu: Add RC6 residency metrics Tvrtko Ursulin
2017-09-29 12:54 ` Chris Wilson
2017-09-29 13:29 ` ✗ Fi.CI.BAT: failure for i915 PMU and engine busy stats (rev14) Patchwork
2017-10-06 8:23 ` ✗ Fi.CI.BAT: failure for i915 PMU and engine busy stats (rev16) Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171005130526.28624-1-tvrtko.ursulin@linux.intel.com \
--to=tursulin@ursulin.net \
--cc=Intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.