From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by gabe.freedesktop.org (Postfix) with ESMTPS id 43E1D10E2ED for ; Wed, 22 Mar 2023 00:05:30 +0000 (UTC) From: Umesh Nerlige Ramappa To: igt-dev@lists.freedesktop.org, Ashutosh Dixit , kamil.konieczny@linux.intel.com Date: Tue, 21 Mar 2023 17:05:07 -0700 Message-Id: <20230322000523.3310187-16-umesh.nerlige.ramappa@intel.com> In-Reply-To: <20230322000523.3310187-1-umesh.nerlige.ramappa@intel.com> References: <20230322000523.3310187-1-umesh.nerlige.ramappa@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [igt-dev] [PATCH i-g-t v4 15/31] i915/perf: Add OAM support List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Lionel G Landwerlin Errors-To: igt-dev-bounces@lists.freedesktop.org Sender: "igt-dev" List-ID: Add OAM formats and support for media engines in perf tests Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit --- include/drm-uapi/i915_drm.h | 4 ++ lib/intel_chipset.h | 3 + lib/intel_device_info.c | 1 + tests/i915/perf.c | 138 +++++++++++++++++++++++++----------- 4 files changed, 105 insertions(+), 41 deletions(-) diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h index b71acfba..a0876ee4 100644 --- a/include/drm-uapi/i915_drm.h +++ b/include/drm-uapi/i915_drm.h @@ -2545,6 +2545,10 @@ enum drm_i915_oa_format { I915_OAR_FORMAT_A32u40_A4u32_B8_C8, I915_OA_FORMAT_A24u40_A14u32_B8_C8, + /* MTL OAM */ + I915_OAM_FORMAT_MPEC8u64_B8_C8, + I915_OAM_FORMAT_MPEC8u32_B8_C8, + I915_OA_FORMAT_MAX /* non-ABI */ }; diff --git a/lib/intel_chipset.h b/lib/intel_chipset.h index c9762ae6..c2c8998d 100644 --- a/lib/intel_chipset.h +++ b/lib/intel_chipset.h @@ -45,6 +45,7 @@ struct intel_device_info { unsigned gt; /* 0 if unknown */ bool has_4tile : 1; bool has_flatccs : 1; + bool has_oam : 1; bool is_mobile : 1; bool is_whitney : 1; bool is_almador : 1; @@ -231,4 +232,6 @@ void intel_check_pch(void); #define HAS_FLATCCS(devid) (intel_get_device_info(devid)->has_flatccs) +#define HAS_OAM(devid) (intel_get_device_info(devid)->has_oam) + #endif /* _INTEL_CHIPSET_H */ diff --git a/lib/intel_device_info.c b/lib/intel_device_info.c index 12b81d48..0b11dfce 100644 --- a/lib/intel_device_info.c +++ b/lib/intel_device_info.c @@ -472,6 +472,7 @@ static const struct intel_device_info intel_meteorlake_info = { .graphics_rel = 70, .display_ver = 14, .has_4tile = true, + .has_oam = true, .is_meteorlake = true, .codename = "meteorlake", .cmds_info = &gen12_mtl_cmds_info, diff --git a/tests/i915/perf.c b/tests/i915/perf.c index d3f80a19..d6db31c0 100644 --- a/tests/i915/perf.c +++ b/tests/i915/perf.c @@ -199,6 +199,40 @@ static struct oa_format dg2_oa_formats[I915_OA_FORMAT_MAX] = { .c_off = 224, .n_c = 8, .oa_type = OAG, }, }; +static struct oa_format mtl_oa_formats[I915_OA_FORMAT_MAX] = { + [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { + "A32u40_A4u32_B8_C8", .size = 256, + .a40_high_off = 160, .a40_low_off = 16, .n_a40 = 32, + .a_off = 144, .n_a = 4, .first_a = 32, + .b_off = 192, .n_b = 8, + .c_off = 224, .n_c = 8, .oa_type = OAR, }, + /* This format has A36 and A37 interleaved with high bytes of some A + * counters, so we will accumulate only subset of counters. + */ + [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { + "A24u40_A14u32_B8_C8", .size = 256, + /* u40: A4 - A23 */ + .a40_high_off = 160, .a40_low_off = 16, .n_a40 = 20, .first_a40 = 4, + /* u32: A0 - A3 */ + .a_off = 16, .n_a = 4, + .b_off = 192, .n_b = 8, + .c_off = 224, .n_c = 8, .oa_type = OAG, }, + + /* Treat MPEC countes as A counters for now */ + [I915_OAM_FORMAT_MPEC8u64_B8_C8] = { + "MPEC8u64_B8_C8", .size = 192, + .a64_off = 32, .n_a64 = 8, + .b_off = 96, .n_b = 8, + .c_off = 128, .n_c = 8, .oa_type = OAM, + .report_hdr_64bit = true, }, + [I915_OAM_FORMAT_MPEC8u32_B8_C8] = { + "MPEC8u32_B8_C8", .size = 128, + .a_off = 32, .n_a = 8, + .b_off = 64, .n_b = 8, + .c_off = 96, .n_c = 8, .oa_type = OAM, + .report_hdr_64bit = true, }, +}; + static bool hsw_undefined_a_counters[45] = { [4] = true, [6] = true, @@ -260,8 +294,10 @@ get_oa_format(enum drm_i915_oa_format format) { if (IS_HASWELL(devid)) return hsw_oa_formats[format]; - else if (IS_DG2(devid) || IS_METEORLAKE(devid)) + else if (IS_DG2(devid)) return dg2_oa_formats[format]; + else if (IS_METEORLAKE(devid)) + return mtl_oa_formats[format]; else if (IS_GEN12(devid)) return gen12_oa_formats[format]; else @@ -343,21 +379,6 @@ static int i915_perf_revision(int fd) return value; } -/* - * perf_supports_engine is used in the for loop that iterates over engines and - * determines if perf test can be run on a particular engine. For perf revisions - * below 10, we only need to run the test once, so we return true only for rcs0. - * Note that the test itself ignores the class instance parameters if they are - * not supported by the perf interface. This enables us to use a single for-loop - * construct to run the same test on all platforms and all perf revisions. - */ -static bool -perf_supports_engine(const struct intel_execution_engine2 *e) -{ - return e->class == I915_ENGINE_CLASS_RENDER && - e->instance == 0; -} - static bool has_param_class_instance(void) { @@ -661,8 +682,12 @@ oar_unit_default_format(void) } static int -oa_unit_default_format(void) +oa_unit_default_format(const struct intel_execution_engine2 *e) { + if (e->class == I915_ENGINE_CLASS_VIDEO || + e->class == I915_ENGINE_CLASS_VIDEO_ENHANCE) + return I915_OAM_FORMAT_MPEC8u32_B8_C8; + return test_set->perf_oa_format; } @@ -1739,6 +1764,20 @@ print_report(uint32_t *report, int fmt) } #endif +static bool +oa_unit_supports_engine(int oa_unit, const struct intel_execution_engine2 *e) +{ + switch (oa_unit) { + case OAM: + return e->class == I915_ENGINE_CLASS_VIDEO || + e->class == I915_ENGINE_CLASS_VIDEO_ENHANCE; + case OAG: + return e->class == I915_ENGINE_CLASS_RENDER; + } + + return false; +} + static void test_oa_formats(const struct intel_execution_engine2 *e) { @@ -1750,7 +1789,7 @@ test_oa_formats(const struct intel_execution_engine2 *e) if (!format.name) /* sparse, indexed by ID */ continue; - if (format.oa_type != OAG) /* sparse, indexed by ID */ + if (!oa_unit_supports_engine(format.oa_type, e)) continue; igt_debug("Checking OA format %s\n", format.name); @@ -1909,7 +1948,7 @@ static bool expected_report_timing_delta(uint32_t delta, uint32_t expected_delta static void test_oa_exponents(const struct intel_execution_engine2 *e) { - uint64_t fmt = oa_unit_default_format(); + uint64_t fmt = oa_unit_default_format(e); load_helper_init(); load_helper_run(HIGH); @@ -2253,7 +2292,7 @@ test_blocking(uint64_t requested_oa_period, ADD_PROPS(props, idx, SAMPLE_OA, true); ADD_PROPS(props, idx, OA_METRICS_SET, test_set->perf_oa_metrics_set); - ADD_PROPS(props, idx, OA_FORMAT, oa_unit_default_format()); + ADD_PROPS(props, idx, OA_FORMAT, oa_unit_default_format(e)); ADD_PROPS(props, idx, OA_EXPONENT, oa_exponent); if (has_param_poll_period() && set_kernel_hrtimer) @@ -2416,7 +2455,7 @@ test_polling(uint64_t requested_oa_period, ADD_PROPS(props, idx, SAMPLE_OA, true); ADD_PROPS(props, idx, OA_METRICS_SET, test_set->perf_oa_metrics_set); - ADD_PROPS(props, idx, OA_FORMAT, oa_unit_default_format()); + ADD_PROPS(props, idx, OA_FORMAT, oa_unit_default_format(e)); ADD_PROPS(props, idx, OA_EXPONENT, oa_exponent); if (has_param_poll_period() && set_kernel_hrtimer) @@ -2690,7 +2729,7 @@ gen12_test_oa_tlb_invalidate(const struct intel_execution_engine2 *e) DRM_I915_PERF_PROP_SAMPLE_OA, true, DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set, - DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(), + DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(e), DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent, DRM_I915_PERF_PROP_OA_ENGINE_CLASS, e->class, DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE, e->instance, @@ -2733,7 +2772,7 @@ test_buffer_fill(const struct intel_execution_engine2 *e) /* ~5 micro second period */ int oa_exponent = max_oa_exponent_for_period_lte(5000); uint64_t oa_period = oa_exponent_to_ns(oa_exponent); - uint64_t fmt = oa_unit_default_format(); + uint64_t fmt = oa_unit_default_format(e); uint64_t properties[] = { /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, @@ -2969,7 +3008,7 @@ test_enable_disable(const struct intel_execution_engine2 *e) /* ~5 micro second period */ int oa_exponent = max_oa_exponent_for_period_lte(5000); uint64_t oa_period = oa_exponent_to_ns(oa_exponent); - uint64_t fmt = oa_unit_default_format(); + uint64_t fmt = oa_unit_default_format(e); uint64_t properties[] = { /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, @@ -4564,7 +4603,7 @@ test_stress_open_close(const struct intel_execution_engine2 *e) /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set, - DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(), + DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(e), DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent, DRM_I915_PERF_PROP_OA_ENGINE_CLASS, e->class, DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE, e->instance, @@ -4668,7 +4707,7 @@ test_global_sseu_config_invalid(const intel_ctx_t *ctx, const struct intel_execu /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set, - DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(), + DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(e), DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, DRM_I915_PERF_PROP_GLOBAL_SSEU, to_user_pointer(&sseu_param), DRM_I915_PERF_PROP_OA_ENGINE_CLASS, e->class, @@ -4758,7 +4797,7 @@ test_global_sseu_config(const intel_ctx_t *ctx, const struct intel_execution_eng /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set, - DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(), + DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(e), DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, DRM_I915_PERF_PROP_GLOBAL_SSEU, to_user_pointer(&sseu_param), DRM_I915_PERF_PROP_OA_ENGINE_CLASS, e->class, @@ -5295,10 +5334,27 @@ test_sysctl_defaults(void) igt_assert_eq(max_freq, 100000); } -#define __for_each_perf_enabled_engine(fd__, e__) \ - for_each_physical_engine(fd__, e__) \ - if (perf_supports_engine(e__)) \ - igt_dynamic_f("%s", e__->name) +static struct intel_execution_engine2 * +__ci_to_e2(const intel_ctx_t *ctx, struct i915_engine_class_instance *ci) +{ + static struct intel_execution_engine2 e2; + struct intel_execution_engine2 *e; + + for_each_ctx_engine(drm_fd, ctx, e) { + if (e->class == ci->engine_class && e->instance == ci->engine_instance) { + e2 = *e; + break; + } + } + + return &e2; +} + +#define __for_random_engine_in_each_group(groups_, ctx_, e_) \ + for (int i_ = 0; \ + i_ < num_perf_oa_groups && !!(e_ = __ci_to_e2(ctx_, random_engine(&groups_[i_]))); \ + i_++) \ + igt_dynamic_f("%d-%s", i_, e_->name) #define __for_each_render_engine(fd__, e__) \ for_each_physical_engine(fd__, e__) \ @@ -5676,7 +5732,7 @@ igt_main test_missing_sample_flags(); igt_subtest_with_dynamic("oa-formats") - __for_each_perf_enabled_engine(drm_fd, e) + __for_random_engine_in_each_group(perf_oa_groups, ctx, e) test_oa_formats(e); igt_subtest("invalid-oa-exponent") @@ -5684,7 +5740,7 @@ igt_main igt_subtest("low-oa-exponent-permissions") test_low_oa_exponent_permissions(); igt_subtest_with_dynamic("oa-exponents") - __for_each_perf_enabled_engine(drm_fd, e) + __for_random_engine_in_each_group(perf_oa_groups, ctx, e) test_oa_exponents(e); igt_subtest("per-context-mode-unprivileged") { @@ -5693,7 +5749,7 @@ igt_main } igt_subtest_with_dynamic("buffer-fill") - __for_each_perf_enabled_engine(drm_fd, e) + __for_random_engine_in_each_group(perf_oa_groups, ctx, e) test_buffer_fill(e); igt_describe("Test that reason field in OA reports is never 0 on Gen8+"); @@ -5709,12 +5765,12 @@ igt_main test_non_sampling_read_error(); igt_subtest_with_dynamic("enable-disable") - __for_each_perf_enabled_engine(drm_fd, e) + __for_random_engine_in_each_group(perf_oa_groups, ctx, e) test_enable_disable(e); igt_describe("Test blocking read with default hrtimer frequency"); igt_subtest_with_dynamic("blocking") { - __for_each_perf_enabled_engine(drm_fd, e) + __for_random_engine_in_each_group(perf_oa_groups, ctx, e) test_blocking(40 * 1000 * 1000 /* 40ms oa period */, false /* set_kernel_hrtimer */, 5 * 1000 * 1000 /* default 5ms/200Hz hrtimer */, @@ -5742,7 +5798,7 @@ igt_main igt_describe("Test polled read with default hrtimer frequency"); igt_subtest_with_dynamic("polling") { - __for_each_perf_enabled_engine(drm_fd, e) + __for_random_engine_in_each_group(perf_oa_groups, ctx, e) test_polling(40 * 1000 * 1000 /* 40ms oa period */, false /* set_kernel_hrtimer */, 5 * 1000 * 1000 /* default 5ms/200Hz hrtimer */, @@ -5812,7 +5868,7 @@ igt_main igt_describe("Test OA TLB invalidate"); igt_subtest_with_dynamic("gen12-oa-tlb-invalidate") - __for_each_perf_enabled_engine(drm_fd, e) + __for_random_engine_in_each_group(perf_oa_groups, ctx, e) gen12_test_oa_tlb_invalidate(e); igt_describe("Measure performance for a specific context using OAR in Gen 12"); @@ -5857,7 +5913,7 @@ igt_main igt_describe("Stress tests opening & closing the i915-perf stream in a busy loop"); igt_subtest_with_dynamic("stress-open-close") - __for_each_perf_enabled_engine(drm_fd, e) + __for_random_engine_in_each_group(perf_oa_groups, ctx, e) test_stress_open_close(e); igt_subtest_group { @@ -5868,12 +5924,12 @@ igt_main igt_describe("Verify invalid SSEU opening parameters"); igt_subtest_with_dynamic("global-sseu-config-invalid") - __for_each_perf_enabled_engine(drm_fd, e) + __for_random_engine_in_each_group(perf_oa_groups, ctx, e) test_global_sseu_config_invalid(ctx, e); igt_describe("Verify specifying SSEU opening parameters"); igt_subtest_with_dynamic("global-sseu-config") - __for_each_perf_enabled_engine(drm_fd, e) + __for_random_engine_in_each_group(perf_oa_groups, ctx, e) test_global_sseu_config(ctx, e); } -- 2.36.1