All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>,
	Tvrtko Ursulin <tursulin@ursulin.net>,
	Intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH i-g-t 6/7] gem_wsim: Busy stats balancers
Date: Tue, 26 Sep 2017 12:27:33 +0100	[thread overview]
Message-ID: <9403cae0-28f3-84db-8050-e90fe90512a6@linux.intel.com> (raw)
In-Reply-To: <150635785474.18819.13865934398009234292@mail.alporthouse.com>


On 25/09/2017 17:44, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2017-09-25 16:15:01)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Add busy and busy-avg balancers which make balancing
>> decisions by looking at engine busyness via the i915 PMU.
> 
> "And thus are able to make decisions on the actual instantaneous load of
> the system, and not use metrics that lag behind by a batch or two. In
> doing so, each client should be able to greedily maximise their own
> usage of the system, leading to improved load balancing even in the face
> of other uncooperative clients. On the other hand, we are only using the
> instantaneous load without coupling in the predictive factor for
> dispatch and execution length."

Ok, thanks for the text.

> Hmm, do you not want to sum busy + queued? Or at least compare! :)

How to add apples and oranges? :) Queued * busy [0.0 - 1.0] ?

>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   benchmarks/gem_wsim.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 140 insertions(+)
>>
>> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
>> index 82fe6ba9ec5f..9ee91fabb220 100644
>> --- a/benchmarks/gem_wsim.c
>> +++ b/benchmarks/gem_wsim.c
>> @@ -50,6 +50,7 @@
>>   #include "intel_io.h"
>>   #include "igt_aux.h"
>>   #include "igt_rand.h"
>> +#include "igt_perf.h"
>>   #include "sw_sync.h"
>>   
>>   #include "ewma.h"
>> @@ -188,6 +189,16 @@ struct workload
>>                          uint32_t last[NUM_ENGINES];
>>                  } rt;
>>          };
>> +
>> +       struct busy_balancer {
>> +               int fd;
>> +               bool first;
>> +               unsigned int num_engines;
>> +               unsigned int engine_map[5];
>> +               uint64_t t_prev;
>> +               uint64_t prev[5];
>> +               double busy[5];
>> +       } busy_balancer;
>>   };
>>   
>>   static const unsigned int nop_calibration_us = 1000;
>> @@ -993,6 +1004,8 @@ struct workload_balancer {
>>          unsigned int flags;
>>          unsigned int min_gen;
>>   
>> +       int (*init)(const struct workload_balancer *balancer,
>> +                   struct workload *wrk);
>>          unsigned int (*get_qd)(const struct workload_balancer *balancer,
>>                                 struct workload *wrk,
>>                                 enum intel_engine_id engine);
>> @@ -1242,6 +1255,106 @@ context_balance(const struct workload_balancer *balancer,
>>          return get_vcs_engine(wrk->ctx_list[w->context].static_vcs);
>>   }
>>   
>> +static unsigned int
>> +get_engine_busy(const struct workload_balancer *balancer,
>> +               struct workload *wrk, enum intel_engine_id engine)
>> +{
>> +       struct busy_balancer *bb = &wrk->busy_balancer;
>> +
>> +       if (engine == VCS2 && (wrk->flags & VCS2REMAP))
>> +               engine = BCS;
>> +
>> +       return bb->busy[bb->engine_map[engine]];
>> +}
>> +
>> +static void get_stats(const struct workload_balancer *b, struct workload *wrk)
> 
> s/get_stats/get_pmu_stats/ ?

Ok.

> 
>> +{
>> +       struct busy_balancer *bb = &wrk->busy_balancer;
>> +       uint64_t val[7];
>> +       unsigned int i;
>> +
>> +       igt_assert_eq(read(bb->fd, val, sizeof(val)), sizeof(val));
>> +
>> +       if (!bb->first) {
>> +               for (i = 0; i < bb->num_engines; i++) {
>> +                       double d;
>> +
>> +                       d = (val[2 + i] - bb->prev[i]) * 100;
>> +                       d /= val[1] - bb->t_prev;
>> +                       bb->busy[i] = d;
>> +               }
>> +       }
>> +
>> +       for (i = 0; i < bb->num_engines; i++)
>> +               bb->prev[i] = val[2 + i];
>> +
>> +       bb->t_prev = val[1];
>> +       bb->first = false;
> 
> Ok.

Although normalizing to [0.0 - 100.0] is only helpful if one wants to 
look at the numbers.

> 
>> +}
>> +
>> +static enum intel_engine_id
>> +busy_avg_balance(const struct workload_balancer *balancer,
>> +                struct workload *wrk, struct w_step *w)
>> +{
>> +       get_stats(balancer, wrk);
>> +
>> +       return qdavg_balance(balancer, wrk, w);
>> +}
>> +
>> +static enum intel_engine_id
>> +busy_balance(const struct workload_balancer *balancer,
>> +            struct workload *wrk, struct w_step *w)
>> +{
>> +       get_stats(balancer, wrk);
>> +
>> +       return qd_balance(balancer, wrk, w);
>> +}
>> +
>> +static int
>> +busy_init(const struct workload_balancer *balancer, struct workload *wrk)
>> +{
>> +       struct busy_balancer *bb = &wrk->busy_balancer;
>> +       struct engine_desc {
>> +               unsigned class, inst;
>> +               enum intel_engine_id id;
>> +       } *d, engines[] = {
>> +               { I915_ENGINE_CLASS_RENDER, 0, RCS },
>> +               { I915_ENGINE_CLASS_COPY, 0, BCS },
>> +               { I915_ENGINE_CLASS_VIDEO, 0, VCS1 },
>> +               { I915_ENGINE_CLASS_VIDEO, 1, VCS2 },
>> +               { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, VECS },
>> +               { 0, 0, VCS }
>> +       };
>> +
>> +       bb->num_engines = 0;
>> +       bb->first = true;
>> +       bb->fd = -1;
>> +
>> +       for (d = &engines[0]; d->id != VCS; d++) {
>> +               int pfd;
>> +
>> +               pfd = perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class,
>> +                                                               d->inst),
>> +                                          bb->fd);
>> +               if (pfd < 0) {
>> +                       if (d->id != VCS2)
>> +                               return -(10 + bb->num_engines);
>> +                       else
>> +                               continue;
>> +               }
>> +
>> +               if (bb->num_engines == 0)
>> +                       bb->fd = pfd;
>> +
>> +               bb->engine_map[d->id] = bb->num_engines++;
>> +       }
>> +
>> +       if (bb->num_engines < 5 && !(wrk->flags & VCS2REMAP))
>> +               return -1;
> 
> Hmm, feels a little sloppy. Would be more concrete if we have a list of
> engines were going to use, and then we either created a perf event for
> each or died.

It is only a theoretical flaw, even though balancer init wo/ VCS2 would 
succeed and would be reporting invalid data for it, the tool would fail 
when trying to submit to VCS2 so no harm done.

> 
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Thanks, I'll see if I will find the time to beautify it wrt/ the above 
at some point.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2017-09-26 11:27 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-25 15:14 [PATCH i-g-t v2 0/7] IGT PMU support Tvrtko Ursulin
2017-09-25 15:14 ` [PATCH i-g-t 1/7] intel-gpu-overlay: Move local perf implementation to a library Tvrtko Ursulin
2017-09-25 15:22   ` Chris Wilson
2017-09-26 10:52     ` Tvrtko Ursulin
2017-09-26 11:07       ` Chris Wilson
2017-09-25 15:14 ` [PATCH i-g-t 2/7] intel-gpu-overlay: Consolidate perf PMU access to library Tvrtko Ursulin
2017-09-25 15:25   ` Chris Wilson
2017-09-25 15:14 ` [PATCH i-g-t 3/7] intel-gpu-overlay: Fix interrupts PMU readout Tvrtko Ursulin
2017-09-25 15:14 ` [PATCH i-g-t 4/7] intel-gpu-overlay: Catch-up to new i915 PMU Tvrtko Ursulin
2017-09-25 15:31   ` Chris Wilson
2017-09-26 10:56     ` Tvrtko Ursulin
2017-09-26 11:13       ` Chris Wilson
2017-09-25 15:15 ` [PATCH i-g-t 5/7] tests/perf_pmu: Tests for i915 PMU API Tvrtko Ursulin
2017-09-25 16:21   ` Chris Wilson
2017-09-26 11:19     ` Tvrtko Ursulin
2017-09-26 11:42       ` Chris Wilson
2017-10-09 10:32         ` Tvrtko Ursulin
2017-10-09 10:55           ` Chris Wilson
2017-09-25 15:15 ` [PATCH i-g-t 6/7] gem_wsim: Busy stats balancers Tvrtko Ursulin
2017-09-25 16:44   ` Chris Wilson
2017-09-26 11:27     ` Tvrtko Ursulin [this message]
2017-09-26 11:48       ` Chris Wilson
2017-09-25 15:15 ` [PATCH i-g-t 7/7] media-bench.pl: Add busy balancers to the list Tvrtko Ursulin
2017-09-25 16:06 ` ✓ Fi.CI.BAT: success for IGT PMU support (rev4) Patchwork
2017-09-25 22:16 ` ✓ Fi.CI.IGT: " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2017-09-29 12:39 [PATCH v3 i-g-t 0/7] IGT PMU support Tvrtko Ursulin
2017-09-29 12:39 ` [PATCH i-g-t 6/7] gem_wsim: Busy stats balancers Tvrtko Ursulin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9403cae0-28f3-84db-8050-e90fe90512a6@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=Intel-gfx@lists.freedesktop.org \
    --cc=chris@chris-wilson.co.uk \
    --cc=tursulin@ursulin.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.