All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS
Date: Mon, 20 Apr 2020 15:54:21 +0300	[thread overview]
Message-ID: <87r1wimi6a.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <158738057131.19285.14967910014530902851@build.alporthouse.com>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2020-04-20 11:54:38)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > One of the core tenents of reclocking the GPU is that its throughput
>> > scales with the clock frequency. We can observe this by incrementing a
>> > loop counter on the GPU, and compare the different execution rates at
>> > the notional RPS frequencies.
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > ---
>> >  drivers/gpu/drm/i915/gt/selftest_gt_pm.c |   3 +-
>> >  drivers/gpu/drm/i915/gt/selftest_rps.c   | 249 +++++++++++++++++++++--
>> >  drivers/gpu/drm/i915/gt/selftest_rps.h   |   1 +
>> >  3 files changed, 240 insertions(+), 13 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
>> > index 0141c334f2ac..4b2733967c42 100644
>> > --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
>> > +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
>> > @@ -53,8 +53,9 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
>> >  {
>> >       static const struct i915_subtest tests[] = {
>> >               SUBTEST(live_rc6_manual),
>> > -             SUBTEST(live_rps_interrupt),
>> > +             SUBTEST(live_rps_frequency),
>> >               SUBTEST(live_rps_power),
>> > +             SUBTEST(live_rps_interrupt),
>> >               SUBTEST(live_gt_resume),
>> >       };
>> >  
>> > diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
>> > index 360f56aa4b82..b1a435db1edc 100644
>> > --- a/drivers/gpu/drm/i915/gt/selftest_rps.c
>> > +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
>> > @@ -6,6 +6,7 @@
>> >  #include <linux/sort.h>
>> >  
>> >  #include "intel_engine_pm.h"
>> > +#include "intel_gpu_commands.h"
>> >  #include "intel_gt_pm.h"
>> >  #include "intel_rc6.h"
>> >  #include "selftest_rps.h"
>> > @@ -17,6 +18,242 @@ static void dummy_rps_work(struct work_struct *wrk)
>> >  {
>> >  }
>> >  
>> > +static int cmp_u64(const void *A, const void *B)
>> > +{
>> > +     const u64 *a = A, *b = B;
>> > +
>> > +     if (a < b)
>> > +             return -1;
>> > +     else if (a > b)
>> > +             return 1;
>> > +     else
>> > +             return 0;
>> > +}
>> > +
>> > +static struct i915_vma *
>> > +create_spin_counter(struct intel_engine_cs *engine,
>> > +                 struct i915_address_space *vm,
>> > +                 u32 **cancel,
>> > +                 u32 **counter)
>> > +{
>> > +     enum {
>> > +             COUNT,
>> 
>> ok, it starts from zero.
>> 
>> > +             INC,
>> > +             __NGPR__,
>> > +     };
>> > +#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
>> > +     struct drm_i915_gem_object *obj;
>> > +     struct i915_vma *vma;
>> > +     u32 *base, *cs;
>> > +     int loop, i;
>> > +     int err;
>> > +
>> > +     obj = i915_gem_object_create_internal(vm->i915, 4096);
>> > +     if (IS_ERR(obj))
>> > +             return ERR_CAST(obj);
>> > +
>> > +     vma = i915_vma_instance(obj, vm, NULL);
>> > +     if (IS_ERR(vma)) {
>> > +             i915_gem_object_put(obj);
>> > +             return vma;
>> > +     }
>> > +
>> > +     err = i915_vma_pin(vma, 0, 0, PIN_USER);
>> > +     if (err) {
>> > +             i915_vma_put(vma);
>> 
>> You forgot to put the obj.
>
> The i915_vma_put() is i915_gem_object_put().
>
> I know, I am in for a reckoning when I have to fix all the allocations
> for i915_vma.kref being independent of the object.
>
>> 
>> > +             return ERR_PTR(err);
>> > +     }
>> > +
>> > +     base = i915_gem_object_pin_map(obj, I915_MAP_WC);
>> > +     if (IS_ERR(base)) {
>> > +             i915_gem_object_put(obj);
>> 
>> You forgot to put the vma?
>
> One and the same :)
>
>> 
>> > +             return ERR_CAST(base);
>> > +     }
>> > +     cs = base;
>> > +
>> > +     *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
>> > +     for (i = 0; i < __NGPR__; i++) {
>> > +             *cs++ = i915_mmio_reg_offset(CS_GPR(i));
>> > +             *cs++ = 0;
>> > +             *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
>> > +             *cs++ = 0;
>> > +     }
>> > +
>> > +     *cs++ = MI_LOAD_REGISTER_IMM(1);
>> > +     *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
>> > +     *cs++ = 1;
>> > +
>> > +     loop = cs - base;
>> > +
>> > +     *cs++ = MI_MATH(4);
>> > +     *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
>> > +     *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
>> > +     *cs++ = MI_MATH_ADD;
>> > +     *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
>> > +
>> > +     *cs++ = MI_STORE_REGISTER_MEM_GEN8;
>> > +     *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
>> > +     *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
>> > +     *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
>> > +
>> > +     *cs++ = MI_BATCH_BUFFER_START_GEN8;
>> > +     *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
>> > +     *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
>> > +
>> > +     i915_gem_object_flush_map(obj);
>> > +
>> > +     *cancel = base + loop;
>> > +     *counter = memset32(base + 1000, 0, 1);
>> > +     return vma;
>> > +}
>> > +
>> > +static u64 __measure_frequency(u32 *cntr, int duration_ms)
>> > +{
>> > +     u64 dc, dt;
>> > +
>> > +     dt = ktime_get();
>> > +     dc = READ_ONCE(*cntr);
>> > +     usleep_range(1000 * duration_ms, 2000 * duration_ms);
>> > +     dc = READ_ONCE(*cntr) - dc;
>> > +     dt = ktime_get() - dt;
>> > +
>> > +     return div64_u64(1000 * 1000 * dc, dt);
>> > +}
>> > +
>> > +static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
>> > +{
>> > +     u64 x[5];
>> > +     int i;
>> > +
>> > +     mutex_lock(&rps->lock);
>> > +     GEM_BUG_ON(!rps->active);
>> > +     intel_rps_set(rps, *freq);
>> > +     mutex_unlock(&rps->lock);
>> > +
>> > +     msleep(20); /* more than enough time to stabilise! */
>> > +
>> > +     for (i = 0; i < 5; i++)
>> > +             x[i] = __measure_frequency(cntr, 2);
>> > +     *freq = read_cagf(rps);
>> > +
>> > +     /* A simple triangle filter for better result stability */
>> > +     sort(x, 5, sizeof(*x), cmp_u64, NULL);
>> > +     return div_u64(x[1] + 2 * x[2] + x[3], 4);
>> > +}
>> > +
>> > +static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
>> > +{
>> > +     return f_d * x > f_n * y && f_n * x < f_d * y;
>> > +}
>> > +
>> > +int live_rps_frequency(void *arg)
>> > +{
>> > +     void (*saved_work)(struct work_struct *wrk);
>> > +     struct intel_gt *gt = arg;
>> > +     struct intel_rps *rps = &gt->rps;
>> > +     struct intel_engine_cs *engine;
>> > +     enum intel_engine_id id;
>> > +     int err = 0;
>> > +
>> > +     /*
>> > +      * The premise is that the GPU does change freqency at our behest.
>> > +      * Let's check there is a correspondence between the requested
>> > +      * frequency, the actual frequency, and the observed clock rate.
>> > +      */
>> > +
>> > +     if (!rps->enabled || rps->max_freq <= rps->min_freq)
>> > +             return 0;
>> > +
>> > +     if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
>> > +             return 0;
>> > +
>> > +     intel_gt_pm_wait_for_idle(gt);
>> > +     saved_work = rps->work.func;
>> > +     rps->work.func = dummy_rps_work;
>> > +
>> > +     for_each_engine(engine, gt, id) {
>> > +             struct i915_request *rq;
>> > +             struct i915_vma *vma;
>> > +             u32 *cancel, *cntr;
>> > +             struct {
>> > +                     u64 count;
>> > +                     int freq;
>> > +             } min, max;
>> > +
>> > +             vma = create_spin_counter(engine,
>> > +                                       engine->kernel_context->vm,
>> > +                                       &cancel, &cntr);
>> > +             if (IS_ERR(vma)) {
>> > +                     err = PTR_ERR(vma);
>> > +                     break;
>> > +             }
>> > +
>> > +             rq = intel_engine_create_kernel_request(engine);
>> > +             if (IS_ERR(rq)) {
>> > +                     err = PTR_ERR(rq);
>> > +                     goto err_vma;
>> > +             }
>> > +
>> > +             i915_vma_lock(vma);
>> > +             err = i915_request_await_object(rq, vma->obj, false);
>> 
>> I am puzzled what we need to wait asynchronously in here.
>
> To bind the vma, mostly. Yes that is now hidden away by
> i915_vma_move_to_active(), but we established the pattern to always add
> the waits even if we expect them to be no-ops -- because it's a hard
> task to find a missing one later.
>  
>> Further, intel_runtime_pm_get is missing.
>
> For what? We acquire the wakeref via the request on the engine.
>
> We don't talk to intel_runtime_pm directly, everything we should be
> doing is engine specific, which knows which gt and the power management
> for that.

I was worried about the read_cagf().

But as it is implied that the request will be running
and we have total control of it during reading th cagf,
it will work like this.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>



> -Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2020-04-20 12:56 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-20  9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
2020-04-20  9:09 ` [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq Chris Wilson
2020-04-20 11:05   ` Mika Kuoppala
2020-04-20  9:09 ` [Intel-gfx] [PATCH 3/4] drm/i915/selftests: Check RPS controls Chris Wilson
2020-04-20 16:41   ` Mika Kuoppala
2020-04-20  9:09 ` [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement Chris Wilson
2020-04-20 16:51   ` Mika Kuoppala
2020-04-20 10:00 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS Patchwork
2020-04-20 10:54 ` [Intel-gfx] [PATCH 1/4] " Mika Kuoppala
2020-04-20 11:02   ` Chris Wilson
2020-04-20 12:54     ` Mika Kuoppala [this message]
2020-04-20 13:08       ` Chris Wilson
2020-04-20 11:12 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/4] " Patchwork
2020-04-20 15:54 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87r1wimi6a.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.