From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, Intel-gfx@lists.freedesktop.org
Subject: Re: [Intel-gfx] [RFC 5/8] drm/i915: Track per-context engine busyness
Date: Fri, 20 Dec 2019 07:58:52 +0000 [thread overview]
Message-ID: <e21bb01f-6b01-c988-2531-fde3d46eec07@linux.intel.com> (raw)
In-Reply-To: <157678871488.6469.7493567337402690504@skylake-alporthouse-com>
On 19/12/2019 20:51, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-12-19 18:00:16)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Some customers want to know how much of the GPU time are their clients
>> using in order to make dynamic load balancing decisions.
>>
>> With the hooks already in place which track the overall engine busyness,
>> we can extend that slightly to split that time between contexts.
>>
>> v2: Fix accounting for tail updates.
>> v3: Rebase.
>> v4: Mark currently running contexts as active on stats enable.
>> v5: Include some headers to fix the build.
>> v6: Added fine grained lock.
>> v7: Convert to seqlock. (Chris Wilson)
>> v8: Rebase and tidy with helpers.
>> v9: Refactor.
>> v10: Move recording start to promotion. (Chris)
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>> drivers/gpu/drm/i915/gt/intel_context.c | 20 +++++++
>> drivers/gpu/drm/i915/gt/intel_context.h | 11 ++++
>> drivers/gpu/drm/i915/gt/intel_context_types.h | 9 ++++
>> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 16 +++++-
>> drivers/gpu/drm/i915/gt/intel_lrc.c | 52 ++++++++++++++++---
>> 5 files changed, 100 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
>> index b1e346d2d35f..b211b48d6cae 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
>> @@ -243,6 +243,7 @@ intel_context_init(struct intel_context *ce,
>> INIT_LIST_HEAD(&ce->signals);
>>
>> mutex_init(&ce->pin_mutex);
>> + seqlock_init(&ce->stats.lock);
>>
>> i915_active_init(&ce->active,
>> __intel_context_active, __intel_context_retire);
>> @@ -337,6 +338,25 @@ struct i915_request *intel_context_create_request(struct intel_context *ce)
>> return rq;
>> }
>>
>> +ktime_t intel_context_get_busy_time(struct intel_context *ce)
>> +{
>> + unsigned int seq;
>> + ktime_t total;
>> +
>> + do {
>> + seq = read_seqbegin(&ce->stats.lock);
>> +
>> + total = ce->stats.total;
>> +
>> + if (ce->stats.active)
>> + total = ktime_add(total,
>> + ktime_sub(ktime_get(),
>> + ce->stats.start));
>> + } while (read_seqretry(&ce->stats.lock, seq));
>> +
>> + return total;
>> +}
>> +
>> #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>> #include "selftest_context.c"
>> #endif
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
>> index b39eb1fcfbca..3a15cf32f0a3 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
>> @@ -160,4 +160,15 @@ static inline struct intel_ring *__intel_context_ring_size(u64 sz)
>> return u64_to_ptr(struct intel_ring, sz);
>> }
>>
>> +static inline void
>> +__intel_context_stats_start(struct intel_context_stats *stats, ktime_t now)
>> +{
>> + if (!stats->active) {
>> + stats->start = now;
>> + stats->active = true;
>> + }
>> +}
>> +
>> +ktime_t intel_context_get_busy_time(struct intel_context *ce);
>> +
>> #endif /* __INTEL_CONTEXT_H__ */
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> index d1204cc899a3..12cbad0798cb 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> @@ -11,6 +11,7 @@
>> #include <linux/list.h>
>> #include <linux/mutex.h>
>> #include <linux/types.h>
>> +#include <linux/seqlock.h>
>>
>> #include "i915_active_types.h"
>> #include "i915_utils.h"
>> @@ -76,6 +77,14 @@ struct intel_context {
>>
>> /** sseu: Control eu/slice partitioning */
>> struct intel_sseu sseu;
>> +
>> + /** stats: Context GPU engine busyness tracking. */
>> + struct intel_context_stats {
>> + seqlock_t lock;
>> + bool active;
>> + ktime_t start;
>> + ktime_t total;
>> + } stats;
>> };
>>
>> #endif /* __INTEL_CONTEXT_TYPES__ */
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> index 3d1d48bf90cf..ac08781c8b24 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> @@ -1577,8 +1577,20 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
>>
>> engine->stats.enabled_at = ktime_get();
>>
>> - /* XXX submission method oblivious? */
>> - for (port = execlists->active; (rq = *port); port++)
>> + /*
>> + * Mark currently running context as active.
>> + * XXX submission method oblivious?
>> + */
>> +
>> + rq = NULL;
>> + port = execlists->active;
>> + if (port)
>> + rq = *port;
>> + if (rq)
>> + __intel_context_stats_start(&rq->hw_context->stats,
>> + engine->stats.enabled_at);
>> +
>> + for (; (rq = *port); port++)
>> engine->stats.active++;
>>
>> for (port = execlists->pending; (rq = *port); port++) {
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> index 4db54fd6a2fe..b186f06e508d 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> @@ -940,6 +940,7 @@ static void intel_engine_context_in(struct intel_engine_cs *engine)
>> if (engine->stats.enabled > 0) {
>> if (engine->stats.active++ == 0)
>> engine->stats.start = ktime_get();
>> +
>> GEM_BUG_ON(engine->stats.active == 0);
>> }
>>
>> @@ -1088,6 +1089,32 @@ static void reset_active(struct i915_request *rq,
>> ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
>> }
>>
>> +static void
>> +intel_context_stats_start(struct intel_context_stats *stats)
>> +{
>> + unsigned long flags;
>> +
>> + write_seqlock_irqsave(&stats->lock, flags);
>> + __intel_context_stats_start(stats, ktime_get());
>> + write_sequnlock_irqrestore(&stats->lock, flags);
>> +}
>> +
>> +static void
>> +intel_context_stats_stop(struct intel_context_stats *stats)
>> +{
>> + unsigned long flags;
>> +
>> + if (!READ_ONCE(stats->active))
>> + return;
>> +
>> + write_seqlock_irqsave(&stats->lock, flags);
>> + GEM_BUG_ON(!READ_ONCE(stats->active));
>> + stats->total = ktime_add(stats->total,
>> + ktime_sub(ktime_get(), stats->start));
>> + stats->active = false;
>> + write_sequnlock_irqrestore(&stats->lock, flags);
>> +}
>> +
>> static inline struct intel_engine_cs *
>> __execlists_schedule_in(struct i915_request *rq)
>> {
>> @@ -1155,7 +1182,7 @@ static inline void
>> __execlists_schedule_out(struct i915_request *rq,
>> struct intel_engine_cs * const engine)
>> {
>> - struct intel_context * const ce = rq->hw_context;
>> + struct intel_context *ce = rq->hw_context;
>>
>> /*
>> * NB process_csb() is not under the engine->active.lock and hence
>> @@ -1172,6 +1199,7 @@ __execlists_schedule_out(struct i915_request *rq,
>> intel_engine_add_retire(engine, ce->timeline);
>>
>> intel_engine_context_out(engine);
>> + intel_context_stats_stop(&ce->stats);
>> execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
>> intel_gt_pm_put_async(engine->gt);
>>
>> @@ -2174,9 +2202,11 @@ static void process_csb(struct intel_engine_cs *engine)
>> promote = gen8_csb_parse(execlists, buf + 2 * head);
>> if (promote) {
>> struct i915_request * const *old = execlists->active;
>> + struct i915_request *rq;
>>
>> /* Point active to the new ELSP; prevent overwriting */
>> WRITE_ONCE(execlists->active, execlists->pending);
>> +
>> set_timeslice(engine);
>>
>> if (!inject_preempt_hang(execlists))
>> @@ -2196,8 +2226,16 @@ static void process_csb(struct intel_engine_cs *engine)
>> sizeof(*execlists->pending)));
>>
>> WRITE_ONCE(execlists->pending[0], NULL);
>> +
>> + rq = *execlists->active;
>> + if (rq)
>> + intel_context_stats_start(&rq->hw_context->stats);
>
> Code duplication :) Both branches are doing
>
> rq = *execlists->active;
> if (rq)
> intel_context_stats_start(&rq->hw_context->stats);
>
> as their final setp, just move it to after the if.
True, thanks!
> Cost of maintaining stats was unnoticeable in profiles and did not
> affect wsim, so lgtm.
Yes I did not stand out to me either.
I tried the opposite test as well, running intel_gpu_top with a 100Hz
update rate and that also wasn't that bad.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2019-12-20 7:58 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-12-19 18:00 [Intel-gfx] [RFC 0/8] Per client engine busyness Tvrtko Ursulin
2019-12-19 18:00 ` [Intel-gfx] [RFC 1/8] drm/i915: Switch context id allocation directoy to xarray Tvrtko Ursulin
2019-12-19 19:55 ` Chris Wilson
2019-12-19 18:00 ` [Intel-gfx] [RFC 2/8] drm/i915: Reference count struct drm_i915_file_private Tvrtko Ursulin
2019-12-19 20:43 ` Chris Wilson
2019-12-20 7:55 ` Tvrtko Ursulin
2019-12-19 18:00 ` [Intel-gfx] [RFC 3/8] drm/i915: Expose list of clients in sysfs Tvrtko Ursulin
2019-12-19 20:48 ` Chris Wilson
2019-12-20 7:56 ` Tvrtko Ursulin
2019-12-20 10:08 ` Chris Wilson
2019-12-19 18:00 ` [Intel-gfx] [RFC 4/8] drm/i915: Update client name on context create Tvrtko Ursulin
2019-12-19 18:00 ` [Intel-gfx] [RFC 5/8] drm/i915: Track per-context engine busyness Tvrtko Ursulin
2019-12-19 20:51 ` Chris Wilson
2019-12-20 7:58 ` Tvrtko Ursulin [this message]
2019-12-19 18:00 ` [Intel-gfx] [RFC 6/8] drm/i915: Track all user contexts per client Tvrtko Ursulin
2019-12-19 18:00 ` [Intel-gfx] [RFC 7/8] drm/i915: Contexts can use struct pid stored in the client Tvrtko Ursulin
2019-12-19 20:57 ` Chris Wilson
2019-12-20 8:00 ` Tvrtko Ursulin
2019-12-19 18:00 ` [Intel-gfx] [RFC 8/8] drm/i915: Expose per-engine client busyness Tvrtko Ursulin
2019-12-19 21:04 ` Chris Wilson
2019-12-20 8:07 ` Tvrtko Ursulin
2019-12-19 21:23 ` Chris Wilson
2019-12-19 23:16 ` Chris Wilson
2019-12-19 18:41 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Per client engine busyness (rev2) Patchwork
2019-12-19 19:17 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e21bb01f-6b01-c988-2531-fde3d46eec07@linux.intel.com \
--to=tvrtko.ursulin@linux.intel.com \
--cc=Intel-gfx@lists.freedesktop.org \
--cc=chris@chris-wilson.co.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox