From: "Anirban, Sk" <sk.anirban@intel.com>
To: Riana Tauro <riana.tauro@intel.com>, <igt-dev@lists.freedesktop.org>
Subject: Re: [i-g-t, v3, 1/2] tests/i915/pm_rc6_residency: Use sysfs to achieve peak frequency
Date: Tue, 8 Apr 2025 21:02:10 +0530 [thread overview]
Message-ID: <ae8cd81e-e3a8-48c7-945b-590825090de7@intel.com> (raw)
In-Reply-To: <08b6bedc-99c2-434f-8e4a-01444d7b639d@intel.com>
Hi Riana,
On 20-03-2025 10:59, Riana Tauro wrote:
> Hi Anirban
>
> On 3/7/2025 12:29 AM, Sk Anirban wrote:
>> Leverage IGT spinner to trigger frequency scaling and
>> set RPS minimum frequency to RP0 for achieving peak frequency.
>> This method replaces the existing waitboost mechanism used
>> to reach maximum frequency.
> Can you specify the reason for the change? How is it helping the test?
Utilizing sysfs enables us to reach the maximum frequency swiftly,
ensuring rapid transitions to peak power for short durations.
This will be beneficial when handling multiple phases in the IGT test.
>
>>
>> v2:
>> - Disable power consumption check for dgfx
>>
>> v3:
>> - Implement igt exit handler
>>
>> Signed-off-by: Sk Anirban <sk.anirban@intel.com>
>> ---
>> tests/intel/i915_pm_rc6_residency.c | 127 +++++++++++++++++++---------
>> 1 file changed, 88 insertions(+), 39 deletions(-)
>>
>> diff --git a/tests/intel/i915_pm_rc6_residency.c
>> b/tests/intel/i915_pm_rc6_residency.c
>> index c9128481d..7d31e2cc0 100644
>> --- a/tests/intel/i915_pm_rc6_residency.c
>> +++ b/tests/intel/i915_pm_rc6_residency.c
>> @@ -264,17 +264,6 @@ static bool __pmu_wait_for_rc6(int fd)
>> return false;
>> }
>> -static uint32_t batch_create(int fd)
>> -{
>> - const uint32_t bbe = MI_BATCH_BUFFER_END;
>> - uint32_t handle;
>> -
>> - handle = gem_create(fd, 4096);
>> - gem_write(fd, handle, 0, &bbe, sizeof(bbe));
>> -
>> - return handle;
>> -}
>> -
>> static int open_pmu(int i915, uint64_t config)
>> {
>> int fd;
>> @@ -286,45 +275,85 @@ static int open_pmu(int i915, uint64_t config)
>> return fd;
>> }
>> -#define WAITBOOST 0x1
>> +#define FREQUENT_BOOST 0x1
> FREQUENT ?> #define ONCE 0x2
yes, as part of 3rd phase of the test we are boosting the freq to max
multiple times, so instead of wait-boosting I have updated the macro
name to related one.
>> static void sighandler(int sig)
>> {
>> }
>> -static void bg_load(int i915, uint32_t ctx_id, uint64_t
>> engine_flags, unsigned int flags, unsigned long *ctl)
>> +static uint32_t get_freq(int dirfd, uint8_t id)
>> +{
>> + uint32_t val;
>> +
>> + igt_assert(igt_sysfs_rps_scanf(dirfd, id, "%u", &val) == 1);
>> +
>> + return val;
>> +}
>> +
>> +static int set_freq(int dirfd, uint8_t id, uint32_t val)
>> +{
>> + return igt_sysfs_rps_printf(dirfd, id, "%u", val);
>> +}
>> +
>> +uint32_t *stash_min;
>> +
>> +static void restore_freq(int sig)
>> +{
>> + int i915 = -1;
>> + int gt_num;
>> + int dirfd;
>> +
>> + i915 = drm_open_driver(DRIVER_INTEL);
> Why reopen again? Doesn't it exist when exit handler is called?> +
> for_each_sysfs_gt_dirfd(i915, dirfd, gt_num) {
I will fix this.
>> + igt_assert_lt(0, set_freq(dirfd, RPS_MIN_FREQ_MHZ,
>> stash_min[gt_num]));
>> + }
>> + close(dirfd);
> unnecessary for_each_sysfs_gt_dirfd is already doing that> +}
I will remove this.
>> +
>> +static void bg_load(int i915, const intel_ctx_t *ctx, int dirfd,
>> uint64_t engine_flags,
>> + unsigned int flags, unsigned long *ctl, unsigned int gt)
>> {
>> const bool has_execlists = intel_gen(intel_get_drm_devid(i915))
>> >= 8;
>> - struct drm_i915_gem_exec_object2 obj = {
>> - .handle = batch_create(i915),
>> - };
>> - struct drm_i915_gem_execbuffer2 execbuf = {
>> - .buffers_ptr = to_user_pointer(&obj),
>> - .buffer_count = 1,
>> - .flags = engine_flags,
>> - .rsvd1 = ctx_id,
>> - };
>> struct sigaction act = {
>> .sa_handler = sighandler
>> };
>> + int64_t timeout = 1;
>> + uint64_t ahnd;
>> + int rp0;
>> + ahnd = get_reloc_ahnd(i915, ctx->id);
>> + rp0 = get_freq(dirfd, RPS_RP0_FREQ_MHZ);
>> sigaction(SIGINT, &act, NULL);
>> do {
>> uint64_t submit, wait, elapsed;
>> struct timespec tv = {};
>> + igt_spin_t *spin;
>> igt_nsec_elapsed(&tv);
>> -
>> - gem_execbuf(i915, &execbuf);
>> + spin = igt_spin_new(i915,
>> + .ahnd = ahnd,
>> + .ctx = ctx,
>> + .engine = engine_flags);
>> submit = igt_nsec_elapsed(&tv);
>> - if (flags & WAITBOOST) {
>> - gem_sync(i915, obj.handle);
>> + if (flags & FREQUENT_BOOST) {
>> + /* Set MIN freq to RP0 to achieve the peak freq */
>> + igt_assert_lt(0, set_freq(dirfd, RPS_MIN_FREQ_MHZ, rp0));
>> + igt_assert(gem_bo_busy(i915, spin->handle));
>> + gem_wait(i915, spin->handle, &timeout);
>> +
>> + /* Restore the MIN freq back to default */
>> + igt_assert_lt(0, set_freq(dirfd, RPS_MIN_FREQ_MHZ,
>> stash_min[gt]));
>> + igt_spin_end(spin);
>> + igt_spin_free(i915, spin);
>> + gem_quiescent_gpu(i915);
>> if (flags & ONCE)
>> - flags &= ~WAITBOOST;
>> + flags &= ~FREQUENT_BOOST;
>> } else {
>> - while (gem_bo_busy(i915, obj.handle))
>> - usleep(0);
>> + igt_assert(gem_bo_busy(i915, spin->handle));
>> + igt_spin_end(spin);
>> + igt_spin_free(i915, spin);
>> + gem_quiescent_gpu(i915);
>> }
>> wait = igt_nsec_elapsed(&tv);
>> @@ -350,6 +379,7 @@ static void bg_load(int i915, uint32_t ctx_id,
>> uint64_t engine_flags, unsigned i
>> /* aim for ~1% busy */
>> usleep(min_t(elapsed, elapsed / 10, 50 * 1000));
>> } while (!READ_ONCE(*ctl));
>> + put_ahnd(ahnd);
>> }
>> static void kill_children(int sig)
>> @@ -361,25 +391,29 @@ static void kill_children(int sig)
>> signal(sig, old);
>> }
>> -static void rc6_idle(int i915, uint32_t ctx_id, uint64_t flags,
>> unsigned int gt)
>> +static void rc6_idle(int i915, const intel_ctx_t *ctx, uint64_t
>> flags, unsigned int gt)
>> {
>> - const int64_t duration_ns = SLEEP_DURATION * (int64_t)NSEC_PER_SEC;
>> + const int64_t duration_ns = 2 * SLEEP_DURATION *
>> (int64_t)NSEC_PER_SEC;
>> const int tolerance = 20; /* Some RC6 is better than none! */
>> const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
>> + int num_gts = igt_sysfs_get_num_gt(i915);
>> struct {
>> const char *name;
>> unsigned int flags;
>> double power;
>> } phases[] = {
>> + { "once", FREQUENT_BOOST | ONCE },
>> { "normal", 0 },
>> - { "boost", WAITBOOST },
>> - { "once", WAITBOOST | ONCE },
>> + { "boost", FREQUENT_BOOST }
>> };
>> + int dirfd = igt_sysfs_gt_open(i915, gt);
>> struct power_sample sample[2];
>> unsigned long slept, cycles;
>> unsigned long *done;
>> uint64_t rc6, ts[2];
>> struct igt_power gpu;
>> + int sysfs_dirfd;
>> + int gt_num;
>> int fd;
>> fd = open_pmu(i915, __I915_PMU_RC6_RESIDENCY(gt));
>> @@ -409,10 +443,16 @@ static void rc6_idle(int i915, uint32_t ctx_id,
>> uint64_t flags, unsigned int gt)
>> done = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
>> + stash_min = (uint32_t *)malloc(sizeof(uint32_t) * num_gts);
>> + for_each_sysfs_gt_dirfd(i915, sysfs_dirfd, gt_num) {
>> + stash_min[gt_num] = get_freq(sysfs_dirfd, RPS_MIN_FREQ_MHZ);
>> + igt_pm_ignore_slpc_efficient_freq(i915, sysfs_dirfd, true);
>> + }
>> +
>> for (int p = 0; p < ARRAY_SIZE(phases); p++) {
>> memset(done, 0, 2 * sizeof(*done));
>> igt_fork(child, 1) /* Setup up a very light load */
>> - bg_load(i915, ctx_id, flags, phases[p].flags, done);
>> + bg_load(i915, ctx, dirfd, flags, phases[p].flags, done,
>> gt);
>> igt_power_get_energy(&gpu, &sample[0]);
>> cycles = -READ_ONCE(done[1]);
>> @@ -451,15 +491,17 @@ static void rc6_idle(int i915, uint32_t ctx_id,
>> uint64_t flags, unsigned int gt)
>> munmap(done, 4096);
>> close(fd);
>> + close(dirfd);
>> + close(sysfs_dirfd);
>> igt_power_close(&gpu);
>> - if (phases[1].power - phases[0].power > 10) {
>> - igt_assert_f(2 * phases[2].power - phases[0].power <=
>> phases[1].power,
>> + if (phases[2].power - phases[1].power > 20 && !gt &&
>> !gem_has_lmem(i915)) {
> Why 20%? and why gt check?
So as the time taken for each phase has been increased, I have updated
the threshold also. In a multi-GT system, running a workload on GT1 can
inadvertently cause GT0 to wake up. Since the tests calculate total GPU
power, this unintended wake-up of GT0 can skew the power measurements
for a single GT. Therefore, I have disabled GT1 for this evaluation.
>> + igt_assert_f(2 * phases[0].power - phases[1].power <=
>> phases[2].power,
>> "Exceeded energy expectations for single busy wait
>> load\n"
>> "Used %.1fmW, min %.1fmW, max %.1fmW, expected
>> less than %.1fmW\n",
>> - phases[2].power, phases[0].power, phases[1].power,
>> - phases[0].power + (phases[1].power -
>> phases[0].power) / 2);
>> + phases[0].power, phases[1].power, phases[2].power,
>> + phases[1].power + (phases[2].power -
>> phases[1].power) / 2);
>> }
>> }
>> @@ -577,6 +619,8 @@ igt_main
>> /* Use drm_open_driver to verify device existence */
>> igt_fixture {
>> i915 = drm_open_driver(DRIVER_INTEL);
>> + intel_allocator_multiprocess_start();
>> + igt_install_exit_handler(restore_freq);
>> }
>> igt_subtest_with_dynamic("rc6-idle") {
>> @@ -590,13 +634,18 @@ igt_main
>> for_each_ctx_engine(i915, ctx, e) {
>> if (e->instance == 0) {
>> igt_dynamic_f("gt%u-%s", gt, e->name)
>> - rc6_idle(i915, ctx->id, e->flags, gt);
>> + rc6_idle(i915, ctx, e->flags, gt);
>> }
>> }
>> intel_ctx_destroy(i915, ctx);
>> }
>> }
>> + igt_fixture
>> + {
>> + intel_allocator_multiprocess_stop();
>> + }
> unnecessary {}
I will fix this.
>
> Thanks
> Riana > +
>> igt_subtest_with_dynamic("rc6-fence") {
>> igt_require_gem(i915);
>> gem_quiescent_gpu(i915);
>
Thanks,
Anirban
next prev parent reply other threads:[~2025-04-08 15:32 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-06 18:59 [i-g-t,v3,0/2] tests/i915/pm_rc6_residency: Replace waitboost with Sk Anirban
2025-03-06 18:59 ` [i-g-t, v3, 1/2] tests/i915/pm_rc6_residency: Use sysfs to achieve peak frequency Sk Anirban
2025-03-20 5:29 ` Riana Tauro
2025-04-08 15:32 ` Anirban, Sk [this message]
2025-03-06 18:59 ` [i-g-t,v3,2/2] HAX: Add rc6-idle tests to fast feedback list Sk Anirban
2025-03-07 3:09 ` ✓ Xe.CI.BAT: success for tests/i915/pm_rc6_residency: Replace waitboost with (rev4) Patchwork
2025-03-07 3:27 ` ✗ i915.CI.BAT: failure " Patchwork
2025-03-07 14:05 ` ✗ Xe.CI.Full: " Patchwork
2025-03-07 22:31 ` ✓ Xe.CI.BAT: success for tests/i915/pm_rc6_residency: Replace waitboost with (rev5) Patchwork
2025-03-07 22:53 ` ✗ i915.CI.BAT: failure " Patchwork
2025-03-09 5:28 ` ✗ Xe.CI.Full: " Patchwork
2025-03-11 14:21 ` ✓ i915.CI.BAT: success " Patchwork
2025-03-12 9:30 ` ✓ i915.CI.Full: " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ae8cd81e-e3a8-48c7-945b-590825090de7@intel.com \
--to=sk.anirban@intel.com \
--cc=igt-dev@lists.freedesktop.org \
--cc=riana.tauro@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox