From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Subject: Re: [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement
Date: Mon, 20 Apr 2020 19:51:09 +0300 [thread overview]
Message-ID: <87lfmqm77m.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20200420090914.14679-4-chris@chris-wilson.co.uk>
Chris Wilson <chris@chris-wilson.co.uk> writes:
> Split the frequency measurement into two modes, so that we can judge the
> impact of the llc setup on top of the pure CS frequency scaling.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 3 +-
> drivers/gpu/drm/i915/gt/selftest_rps.c | 157 ++++++++++++++++++++++-
> drivers/gpu/drm/i915/gt/selftest_rps.h | 3 +-
> 3 files changed, 154 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> index de3eaef40596..9855e6f0ce7c 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
> @@ -54,7 +54,8 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
> static const struct i915_subtest tests[] = {
> SUBTEST(live_rc6_manual),
> SUBTEST(live_rps_control),
> - SUBTEST(live_rps_frequency),
> + SUBTEST(live_rps_frequency_cs),
> + SUBTEST(live_rps_frequency_srm),
> SUBTEST(live_rps_power),
> SUBTEST(live_rps_interrupt),
> SUBTEST(live_gt_resume),
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
> index 19fa6a561de3..dbca673519a2 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
> @@ -33,6 +33,7 @@ static int cmp_u64(const void *A, const void *B)
> static struct i915_vma *
> create_spin_counter(struct intel_engine_cs *engine,
> struct i915_address_space *vm,
> + bool srm,
> u32 **cancel,
> u32 **counter)
> {
> @@ -91,10 +92,12 @@ create_spin_counter(struct intel_engine_cs *engine,
> *cs++ = MI_MATH_ADD;
> *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
>
> - *cs++ = MI_STORE_REGISTER_MEM_GEN8;
> - *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
> - *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
> - *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
> + if (srm) {
> + *cs++ = MI_STORE_REGISTER_MEM_GEN8;
> + *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
> + *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
> + *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
> + }
>
> *cs++ = MI_BATCH_BUFFER_START_GEN8;
> *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
> @@ -103,7 +106,7 @@ create_spin_counter(struct intel_engine_cs *engine,
> i915_gem_object_flush_map(obj);
>
> *cancel = base + loop;
> - *counter = memset32(base + 1000, 0, 1);
> + *counter = srm ? memset32(base + 1000, 0, 1) : NULL;
Ok hmm you want a blowout on wrong usage. Fair enough.
> return vma;
> }
>
> @@ -301,12 +304,152 @@ static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
> return div_u64(x[1] + 2 * x[2] + x[3], 4);
> }
>
> +static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
> + int duration_ms)
> +{
> + u64 dc, dt;
> +
> + dt = ktime_get();
> + dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
> + usleep_range(1000 * duration_ms, 2000 * duration_ms);
> + dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
Ok, again you measure active engine and getwaway with this.
On the basis that you want minimal impact on the observed
engine,
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> + dt = ktime_get() - dt;
> +
> + return div64_u64(1000 * 1000 * dc, dt);
> +}
> +
> +static u64 measure_cs_frequency_at(struct intel_rps *rps,
> + struct intel_engine_cs *engine,
> + int *freq)
> +{
> + u64 x[5];
> + int i;
> +
> + *freq = rps_set_check(rps, *freq);
> + for (i = 0; i < 5; i++)
> + x[i] = __measure_cs_frequency(engine, 2);
> + *freq = (*freq + read_cagf(rps)) / 2;
> +
> + /* A simple triangle filter for better result stability */
> + sort(x, 5, sizeof(*x), cmp_u64, NULL);
> + return div_u64(x[1] + 2 * x[2] + x[3], 4);
> +}
> +
> static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
> {
> return f_d * x > f_n * y && f_n * x < f_d * y;
> }
>
> -int live_rps_frequency(void *arg)
> +int live_rps_frequency_cs(void *arg)
> +{
> + void (*saved_work)(struct work_struct *wrk);
> + struct intel_gt *gt = arg;
> + struct intel_rps *rps = >->rps;
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> + int err = 0;
> +
> + /*
> + * The premise is that the GPU does change freqency at our behest.
> + * Let's check there is a correspondence between the requested
> + * frequency, the actual frequency, and the observed clock rate.
> + */
> +
> + if (!rps->enabled || rps->max_freq <= rps->min_freq)
> + return 0;
> +
> + if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
> + return 0;
> +
> + intel_gt_pm_wait_for_idle(gt);
> + saved_work = rps->work.func;
> + rps->work.func = dummy_rps_work;
> +
> + for_each_engine(engine, gt, id) {
> + struct i915_request *rq;
> + struct i915_vma *vma;
> + u32 *cancel, *cntr;
> + struct {
> + u64 count;
> + int freq;
> + } min, max;
> +
> + vma = create_spin_counter(engine,
> + engine->kernel_context->vm, false,
> + &cancel, &cntr);
> + if (IS_ERR(vma)) {
> + err = PTR_ERR(vma);
> + break;
> + }
> +
> + rq = intel_engine_create_kernel_request(engine);
> + if (IS_ERR(rq)) {
> + err = PTR_ERR(rq);
> + goto err_vma;
> + }
> +
> + i915_vma_lock(vma);
> + err = i915_request_await_object(rq, vma->obj, false);
> + if (!err)
> + err = i915_vma_move_to_active(vma, rq, 0);
> + if (!err)
> + err = rq->engine->emit_bb_start(rq,
> + vma->node.start,
> + PAGE_SIZE, 0);
> + i915_vma_unlock(vma);
> + i915_request_add(rq);
> + if (err)
> + goto err_vma;
> +
> + if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
> + 10)) {
> + pr_err("%s: timed loop did not start\n",
> + engine->name);
> + goto err_vma;
> + }
> +
> + min.freq = rps->min_freq;
> + min.count = measure_cs_frequency_at(rps, engine, &min.freq);
> +
> + max.freq = rps->max_freq;
> + max.count = measure_cs_frequency_at(rps, engine, &max.freq);
> +
> + pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
> + engine->name,
> + min.count, intel_gpu_freq(rps, min.freq),
> + max.count, intel_gpu_freq(rps, max.freq),
> + (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
> + max.freq * min.count));
> +
> + if (!scaled_within(max.freq * min.count,
> + min.freq * max.count,
> + 2, 3)) {
> + pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
> + engine->name,
> + max.freq * min.count,
> + min.freq * max.count);
> + err = -EINVAL;
> + }
> +
> +err_vma:
> + *cancel = MI_BATCH_BUFFER_END;
> + i915_gem_object_unpin_map(vma->obj);
> + i915_vma_unpin(vma);
> + i915_vma_put(vma);
> +
> + if (igt_flush_test(gt->i915))
> + err = -EIO;
> + if (err)
> + break;
> + }
> +
> + intel_gt_pm_wait_for_idle(gt);
> + rps->work.func = saved_work;
> +
> + return err;
> +}
> +
> +int live_rps_frequency_srm(void *arg)
> {
> void (*saved_work)(struct work_struct *wrk);
> struct intel_gt *gt = arg;
> @@ -341,7 +484,7 @@ int live_rps_frequency(void *arg)
> } min, max;
>
> vma = create_spin_counter(engine,
> - engine->kernel_context->vm,
> + engine->kernel_context->vm, true,
> &cancel, &cntr);
> if (IS_ERR(vma)) {
> err = PTR_ERR(vma);
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
> index be0bf8e3f639..22e46c5341c5 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rps.h
> +++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
> @@ -7,7 +7,8 @@
> #define SELFTEST_RPS_H
>
> int live_rps_control(void *arg);
> -int live_rps_frequency(void *arg);
> +int live_rps_frequency_cs(void *arg);
> +int live_rps_frequency_srm(void *arg);
> int live_rps_interrupt(void *arg);
> int live_rps_power(void *arg);
>
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2020-04-20 16:53 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-04-20 9:09 [Intel-gfx] [PATCH 1/4] drm/i915/selftests: Verify frequency scaling with RPS Chris Wilson
2020-04-20 9:09 ` [Intel-gfx] [PATCH 2/4] drm/i915/selftests: Skip energy consumption tests if not controlling freq Chris Wilson
2020-04-20 11:05 ` Mika Kuoppala
2020-04-20 9:09 ` [Intel-gfx] [PATCH 3/4] drm/i915/selftests: Check RPS controls Chris Wilson
2020-04-20 16:41 ` Mika Kuoppala
2020-04-20 9:09 ` [Intel-gfx] [PATCH 4/4] drm/i915/selftests: Split RPS frequency measurement Chris Wilson
2020-04-20 16:51 ` Mika Kuoppala [this message]
2020-04-20 10:00 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/selftests: Verify frequency scaling with RPS Patchwork
2020-04-20 10:54 ` [Intel-gfx] [PATCH 1/4] " Mika Kuoppala
2020-04-20 11:02 ` Chris Wilson
2020-04-20 12:54 ` Mika Kuoppala
2020-04-20 13:08 ` Chris Wilson
2020-04-20 11:12 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/4] " Patchwork
2020-04-20 15:54 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87lfmqm77m.fsf@gaia.fi.intel.com \
--to=mika.kuoppala@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.