From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mgamail.intel.com (mgamail.intel.com [134.134.136.100]) by gabe.freedesktop.org (Postfix) with ESMTPS id 6A6B710E718 for ; Fri, 22 Sep 2023 21:52:40 +0000 (UTC) From: Umesh Nerlige Ramappa To: igt-dev@lists.freedesktop.org Date: Fri, 22 Sep 2023 14:52:31 -0700 Message-Id: <20230922215233.2438200-4-umesh.nerlige.ramappa@intel.com> In-Reply-To: <20230922215233.2438200-1-umesh.nerlige.ramappa@intel.com> References: <20230922215233.2438200-1-umesh.nerlige.ramappa@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [igt-dev] [PATCH i-g-t 3/5] i915/pmu: Switch to new busyness counter if old one is unavailable List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: igt-dev-bounces@lists.freedesktop.org Sender: "igt-dev" List-ID: MTL onwards, the old busyness counter is deprecated and users must use the busyness ticks counter. Add support in IGT tests to switch to new counter as needed. Signed-off-by: Umesh Nerlige Ramappa --- lib/i915/i915_drm_local.h | 10 ++++ tests/intel/perf_pmu.c | 115 +++++++++++++++++++++++++------------- 2 files changed, 86 insertions(+), 39 deletions(-) diff --git a/lib/i915/i915_drm_local.h b/lib/i915/i915_drm_local.h index 0f47578c6..b94b88de3 100644 --- a/lib/i915/i915_drm_local.h +++ b/lib/i915/i915_drm_local.h @@ -26,6 +26,13 @@ extern "C" { #define DRM_I915_PERF_PROP_OA_ENGINE_CLASS 9 #define DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE 10 +#define I915_SAMPLE_BUSY_TICKS (I915_SAMPLE_SEMA + 1) + +#define I915_PMU_ENGINE_BUSY_TICKS(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY_TICKS) + +#define I915_PMU_TOTAL_ACTIVE_TICKS __I915_PMU_OTHER(5) + /* * Top 4 bits of every non-engine counter are GT id. */ @@ -40,6 +47,9 @@ extern "C" { #define __I915_PMU_INTERRUPTS(gt) ___I915_PMU_OTHER(gt, 2) #define __I915_PMU_RC6_RESIDENCY(gt) ___I915_PMU_OTHER(gt, 3) #define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt) ___I915_PMU_OTHER(gt, 4) +#define __I915_PMU_TOTAL_ACTIVE_TICKS(gt) ___I915_PMU_OTHER(gt, 5) + +#define I915_SCHEDULER_CAP_ENGINE_BUSY_TICKS_STATS (1ul << 5) #define I915_GEM_CREATE_EXT_SET_PAT 2 diff --git a/tests/intel/perf_pmu.c b/tests/intel/perf_pmu.c index eafa3d988..5999d1e22 100644 --- a/tests/intel/perf_pmu.c +++ b/tests/intel/perf_pmu.c @@ -334,6 +334,35 @@ static char *get_drpc(int i915, int gt_id) return igt_sysfs_get(gt_dir, "drpc"); } +static uint64_t __to_ns(uint64_t val) +{ + return busy_ticks_only ? + (val * NSEC_PER_SEC) / cs_ts_freq : + val; +} + +#define __batch_duration_ns \ +({ \ + unsigned long __delay_ns = busy_ticks_only ? \ + 2000e6 : \ + batch_duration_ns; \ + __delay_ns; \ +}) + +#define __I915_PMU_ENGINE_BUSY(c, i) \ +({ \ + uint64_t __config; \ + typeof(c) __c = c; \ + typeof(i) __i = i; \ + \ + if (busy_ticks_only) \ + __config = I915_PMU_ENGINE_BUSY_TICKS(__c, __i); \ + else \ + __config = I915_PMU_ENGINE_BUSY(__c, __i); \ + \ + __config; \ +}) + static int open_pmu(int i915, uint64_t config) { int fd; @@ -506,10 +535,11 @@ single(int gem_fd, const intel_ctx_t *ctx, spin = NULL; val = pmu_read_single(fd); - slept = measured_usleep(batch_duration_ns / 1000); + slept = measured_usleep(__batch_duration_ns / 1000); if (flags & TEST_TRAILING_IDLE) end_spin(gem_fd, spin, flags); val = pmu_read_single(fd) - val; + val = __to_ns(val); if (flags & FLAG_HANG) igt_force_gpu_reset(gem_fd); @@ -555,11 +585,12 @@ busy_start(int gem_fd, const intel_ctx_t *ctx, spin = __igt_sync_spin(gem_fd, ahnd, ctx, e); - fd = open_pmu(gem_fd, I915_PMU_ENGINE_BUSY(e->class, e->instance)); + fd = open_pmu(gem_fd, __I915_PMU_ENGINE_BUSY(e->class, e->instance)); val = __pmu_read_single(fd, &ts[0]); - slept = measured_usleep(batch_duration_ns / 1000); + slept = measured_usleep(__batch_duration_ns / 1000); val = __pmu_read_single(fd, &ts[1]) - val; + val = __to_ns(val); igt_debug("slept=%lu perf=%"PRIu64"\n", slept, ts[1] - ts[0]); igt_spin_free(gem_fd, spin); @@ -611,11 +642,12 @@ busy_double_start(int gem_fd, const intel_ctx_t *ctx, * Open PMU as fast as possible after the second spin batch in attempt * to be faster than the driver handling lite-restore. */ - fd = open_pmu(gem_fd, I915_PMU_ENGINE_BUSY(e->class, e->instance)); + fd = open_pmu(gem_fd, __I915_PMU_ENGINE_BUSY(e->class, e->instance)); val = __pmu_read_single(fd, &ts[0]); - slept = measured_usleep(batch_duration_ns / 1000); + slept = measured_usleep(__batch_duration_ns / 1000); val = __pmu_read_single(fd, &ts[1]) - val; + val = __to_ns(val); igt_debug("slept=%lu perf=%"PRIu64"\n", slept, ts[1] - ts[0]); igt_spin_end(spin[0]); @@ -685,8 +717,8 @@ busy_check_all(int gem_fd, const intel_ctx_t *ctx, busy_idx = i; fd[i++] = open_group(gem_fd, - I915_PMU_ENGINE_BUSY(e_->class, - e_->instance), + __I915_PMU_ENGINE_BUSY(e_->class, + e_->instance), fd[0]); } @@ -694,7 +726,7 @@ busy_check_all(int gem_fd, const intel_ctx_t *ctx, spin = igt_sync_spin(gem_fd, ahnd, ctx, e); pmu_read_multi(fd[0], num_engines, tval[0]); - slept = measured_usleep(batch_duration_ns / 1000); + slept = measured_usleep(__batch_duration_ns / 1000); if (flags & TEST_TRAILING_IDLE) end_spin(gem_fd, spin, flags); pmu_read_multi(fd[0], num_engines, tval[1]); @@ -706,7 +738,7 @@ busy_check_all(int gem_fd, const intel_ctx_t *ctx, put_ahnd(ahnd); for (i = 0; i < num_engines; i++) - val[i] = tval[1][i] - tval[0][i]; + val[i] = __to_ns(tval[1][i] - tval[0][i]); log_busy(num_engines, val); @@ -756,7 +788,7 @@ most_busy_check_all(int gem_fd, const intel_ctx_t *ctx, else spin = __igt_sync_spin_poll(gem_fd, ahnd, ctx, e_); - val[i++] = I915_PMU_ENGINE_BUSY(e_->class, e_->instance); + val[i++] = __I915_PMU_ENGINE_BUSY(e_->class, e_->instance); } igt_assert(i == num_engines); igt_require(spin); /* at least one busy engine */ @@ -769,7 +801,7 @@ most_busy_check_all(int gem_fd, const intel_ctx_t *ctx, usleep(__igt_sync_spin_wait(gem_fd, spin) * num_engines / 1e3); pmu_read_multi(fd[0], num_engines, tval[0]); - slept = measured_usleep(batch_duration_ns / 1000); + slept = measured_usleep(__batch_duration_ns / 1000); if (flags & TEST_TRAILING_IDLE) end_spin(gem_fd, spin, flags); pmu_read_multi(fd[0], num_engines, tval[1]); @@ -781,7 +813,7 @@ most_busy_check_all(int gem_fd, const intel_ctx_t *ctx, put_ahnd(ahnd); for (i = 0; i < num_engines; i++) - val[i] = tval[1][i] - tval[0][i]; + val[i] = __to_ns(tval[1][i] - tval[0][i]); log_busy(num_engines, val); @@ -815,7 +847,7 @@ all_busy_check_all(int gem_fd, const intel_ctx_t *ctx, else spin = __igt_sync_spin_poll(gem_fd, ahnd, ctx, e); - val[i++] = I915_PMU_ENGINE_BUSY(e->class, e->instance); + val[i++] = __I915_PMU_ENGINE_BUSY(e->class, e->instance); } igt_assert(i == num_engines); @@ -827,7 +859,7 @@ all_busy_check_all(int gem_fd, const intel_ctx_t *ctx, usleep(__igt_sync_spin_wait(gem_fd, spin) * num_engines / 1e3); pmu_read_multi(fd[0], num_engines, tval[0]); - slept = measured_usleep(batch_duration_ns / 1000); + slept = measured_usleep(__batch_duration_ns / 1000); if (flags & TEST_TRAILING_IDLE) end_spin(gem_fd, spin, flags); pmu_read_multi(fd[0], num_engines, tval[1]); @@ -839,7 +871,7 @@ all_busy_check_all(int gem_fd, const intel_ctx_t *ctx, put_ahnd(ahnd); for (i = 0; i < num_engines; i++) - val[i] = tval[1][i] - tval[0][i]; + val[i] = __to_ns(tval[1][i] - tval[0][i]); log_busy(num_engines, val); @@ -870,7 +902,7 @@ no_sema(int gem_fd, const intel_ctx_t *ctx, spin = NULL; pmu_read_multi(fd[0], 2, val[0]); - measured_usleep(batch_duration_ns / 1000); + measured_usleep(__batch_duration_ns / 1000); if (flags & TEST_TRAILING_IDLE) end_spin(gem_fd, spin, flags); pmu_read_multi(fd[0], 2, val[1]); @@ -983,7 +1015,7 @@ sema_wait(int gem_fd, const intel_ctx_t *ctx, "sampling failed to start withing 10ms\n"); val[0] = __pmu_read_single(fd, &ts[0]); - slept = measured_usleep(batch_duration_ns / 1000); + slept = measured_usleep(__batch_duration_ns / 1000); if (flags & TEST_TRAILING_IDLE) obj_ptr[0] = 1; val[1] = __pmu_read_single(fd, &ts[1]); @@ -1104,11 +1136,11 @@ __sema_busy(int gem_fd, uint64_t ahnd, int pmu, const intel_ctx_t *ctx, total = pmu_read_multi(pmu, 2, start); - sema = measured_usleep(batch_duration_ns * sema_pct / 100 / 1000); + sema = measured_usleep(__batch_duration_ns * sema_pct / 100 / 1000); *map = 2; __sync_synchronize(); - busy = measured_usleep(batch_duration_ns * (busy_pct - sema_pct) / 100 / 1000); + busy = measured_usleep(__batch_duration_ns * (busy_pct - sema_pct) / 100 / 1000); igt_spin_end(spin); - measured_usleep(batch_duration_ns * (100 - busy_pct) / 100 / 1000); + measured_usleep(__batch_duration_ns * (100 - busy_pct) / 100 / 1000); total = pmu_read_multi(pmu, 2, val) - total; igt_spin_free(gem_fd, spin); @@ -1116,7 +1148,7 @@ __sema_busy(int gem_fd, uint64_t ahnd, int pmu, const intel_ctx_t *ctx, busy += sema; val[SEMA] -= start[SEMA]; - val[BUSY] -= start[BUSY]; + val[BUSY] -= __to_ns(start[BUSY]); igt_info("%s, target: {%.1f%% [%d], %.1f%% [%d]}, measured: {%.1f%%, %.1f%%}\n", e->name, @@ -1145,7 +1177,7 @@ sema_busy(int gem_fd, const intel_ctx_t *ctx, fd[0] = open_group(gem_fd, I915_PMU_ENGINE_SEMA(e->class, e->instance), -1); - fd[1] = open_group(gem_fd, I915_PMU_ENGINE_BUSY(e->class, e->instance), + fd[1] = open_group(gem_fd, __I915_PMU_ENGINE_BUSY(e->class, e->instance), fd[0]); __sema_busy(gem_fd, ahnd, fd[0], ctx, e, 50, 100); @@ -1173,8 +1205,9 @@ static void test_awake(int i915, const intel_ctx_t *ctx) igt_spin_new(i915, .ahnd = ahnd, .ctx = ctx, .engine = e->flags); val = pmu_read_single(fd); - slept = measured_usleep(batch_duration_ns / 1000); + slept = measured_usleep(__batch_duration_ns / 1000); val = pmu_read_single(fd) - val; + val = __to_ns(val); gem_quiescent_gpu(i915); assert_within_epsilon(val, slept, tolerance); @@ -1185,7 +1218,7 @@ static void test_awake(int i915, const intel_ctx_t *ctx) igt_spin_new(i915, .ahnd = ahnd, .ctx = ctx, .engine = e->flags); val = pmu_read_single(fd); - slept = measured_usleep(batch_duration_ns / 1000); + slept = measured_usleep(__batch_duration_ns / 1000); val = pmu_read_single(fd) - val; gem_quiescent_gpu(i915); @@ -1405,7 +1438,7 @@ static void multi_client(int gem_fd, const intel_ctx_t *ctx, const struct intel_execution_engine2 *e) { - uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance); + uint64_t config = __I915_PMU_ENGINE_BUSY(e->class, e->instance); unsigned long slept[2]; uint64_t val[2], ts[2], perf_slept[2]; igt_spin_t *spin; @@ -1426,14 +1459,16 @@ multi_client(int gem_fd, const intel_ctx_t *ctx, spin = igt_sync_spin(gem_fd, ahnd, ctx, e); val[0] = val[1] = __pmu_read_single(fd[0], &ts[0]); - slept[1] = measured_usleep(batch_duration_ns / 1000); + slept[1] = measured_usleep(__batch_duration_ns / 1000); val[1] = __pmu_read_single(fd[1], &ts[1]) - val[1]; + val[1] = __to_ns(val[1]); perf_slept[1] = ts[1] - ts[0]; igt_debug("slept=%lu perf=%"PRIu64"\n", slept[1], perf_slept[1]); close(fd[1]); - slept[0] = measured_usleep(batch_duration_ns / 1000) + slept[1]; + slept[0] = measured_usleep(__batch_duration_ns / 1000) + slept[1]; val[0] = __pmu_read_single(fd[0], &ts[1]) - val[0]; + val[0] = __to_ns(val[0]); perf_slept[0] = ts[1] - ts[0]; igt_debug("slept=%lu perf=%"PRIu64"\n", slept[0], perf_slept[0]); @@ -1463,7 +1498,7 @@ static void invalid_init(int i915) #define ATTR_INIT() \ do { \ memset(&attr, 0, sizeof (attr)); \ - attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \ + attr.config = __I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \ attr.type = i915_perf_type_id(i915); \ igt_assert(attr.type != 0); \ errno = 0; \ @@ -1510,7 +1545,7 @@ static void cpu_hotplug(int gem_fd) igt_require(cpu0_hotplug_support()); fd = open_pmu(gem_fd, - I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0)); + __I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0)); /* * Create two spinners so test can ensure shorter gaps in engine @@ -1608,6 +1643,7 @@ static void cpu_hotplug(int gem_fd) } val = __pmu_read_single(fd, &ts[1]) - val; + val = __to_ns(val); end_spin(gem_fd, spin[0], FLAG_SYNC); end_spin(gem_fd, spin[1], FLAG_SYNC); @@ -1839,7 +1875,7 @@ test_frequency(int gem_fd, unsigned int gt) spin = spin_sync_gt(gem_fd, ahnd, gt, &ctx); slept = pmu_read_multi(fd[0], 2, start); - measured_usleep(batch_duration_ns / 1000); + measured_usleep(__batch_duration_ns / 1000); slept = pmu_read_multi(fd[0], 2, val) - slept; min[0] = 1e9*(val[0] - start[0]) / slept; @@ -1869,7 +1905,7 @@ test_frequency(int gem_fd, unsigned int gt) spin = spin_sync_gt(gem_fd, ahnd, gt, &ctx); slept = pmu_read_multi(fd[0], 2, start); - measured_usleep(batch_duration_ns / 1000); + measured_usleep(__batch_duration_ns / 1000); slept = pmu_read_multi(fd[0], 2, val) - slept; max[0] = 1e9*(val[0] - start[0]) / slept; @@ -1927,7 +1963,7 @@ test_frequency_idle(int gem_fd, unsigned int gt) measured_usleep(2000); /* Wait for timers to cease */ slept = pmu_read_multi(fd[0], 2, start); - measured_usleep(batch_duration_ns / 1000); + measured_usleep(__batch_duration_ns / 1000); slept = pmu_read_multi(fd[0], 2, val) - slept; close(fd[0]); @@ -2150,7 +2186,7 @@ static void test_enable_race(int gem_fd, const intel_ctx_t *ctx, const struct intel_execution_engine2 *e) { - uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance); + uint64_t config = __I915_PMU_ENGINE_BUSY(e->class, e->instance); struct igt_helper_process engine_load = { }; const uint32_t bbend = MI_BATCH_BUFFER_END; struct drm_i915_gem_exec_object2 obj = { }; @@ -2223,7 +2259,7 @@ accuracy(int gem_fd, const intel_ctx_t *ctx, unsigned long test_us; unsigned long cycle_us, busy_us, idle_us; double busy_r, expected; - uint64_t val[2]; + uint64_t val[2], _val; uint64_t ts[2]; int link[2]; int fd; @@ -2342,7 +2378,7 @@ accuracy(int gem_fd, const intel_ctx_t *ctx, put_ahnd(ahnd); } - fd = open_pmu(gem_fd, I915_PMU_ENGINE_BUSY(e->class, e->instance)); + fd = open_pmu(gem_fd, __I915_PMU_ENGINE_BUSY(e->class, e->instance)); /* Let the child run. */ read(link[0], &expected, sizeof(expected)); @@ -2359,7 +2395,8 @@ accuracy(int gem_fd, const intel_ctx_t *ctx, igt_waitchildren(); - busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]); + _val = __to_ns(val[1] - val[0]); + busy_r = (double)_val / (ts[1] - ts[0]); igt_info("error=%.2f%% (%.2f%% vs %.2f%%)\n", (busy_r - expected) * 100, 100 * busy_r, 100 * expected); @@ -2392,7 +2429,7 @@ static void faulting_read(int gem_fd, const struct mmap_offset *t) ptr = create_mmap(gem_fd, t, 4096); igt_require(ptr != NULL); - fd = open_pmu(gem_fd, I915_PMU_ENGINE_BUSY(0, 0)); + fd = open_pmu(gem_fd, __I915_PMU_ENGINE_BUSY(0, 0)); igt_require(fd != -1); igt_assert_eq(read(fd, ptr, 4096), 2 * sizeof(uint64_t)); close(fd); @@ -2433,7 +2470,7 @@ static void test_unload(unsigned int num_engines) cfg = intel_ctx_cfg_all_physical(i915); for_each_ctx_cfg_engine(i915, &cfg, e) { fd[count] = perf_i915_open_group(i915, - I915_PMU_ENGINE_BUSY(e->class, e->instance), + __I915_PMU_ENGINE_BUSY(e->class, e->instance), fd[count - 1]); if (fd[count] != -1) count++; @@ -2580,7 +2617,7 @@ igt_main * is correctly rejected. */ test_each_engine("init-busy", fd, ctx, e) - init(fd, ctx, e, I915_PMU_ENGINE_BUSY(e->class, e->instance)); + init(fd, ctx, e, __I915_PMU_ENGINE_BUSY(e->class, e->instance)); test_each_engine("init-wait", fd, ctx, e) init(fd, ctx, e, I915_PMU_ENGINE_WAIT(e->class, e->instance)); -- 2.38.1