From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mgamail.intel.com (mgamail.intel.com [134.134.136.126]) by gabe.freedesktop.org (Postfix) with ESMTPS id 95F4510E021 for ; Mon, 14 Aug 2023 22:43:04 +0000 (UTC) From: Umesh Nerlige Ramappa To: igt-dev@lists.freedesktop.org Date: Mon, 14 Aug 2023 15:43:03 -0700 Message-Id: <20230814224303.375833-1-umesh.nerlige.ramappa@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [igt-dev] [PATCH i-g-t v2] tests/xe/xe_query: Add a test for querying engine cycles List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Lionel G Landwerlin Errors-To: igt-dev-bounces@lists.freedesktop.org Sender: "igt-dev" List-ID: The DRM_XE_QUERY_ENGINE_CYCLES provides a way for the user to obtain CPU and GPU timestamps as close to each other as possible. Add a test to query engine cycles and GPU/CPU time correlation as well as validate the parameters. v2: - Update the test for kernel uapi changes Signed-off-by: Umesh Nerlige Ramappa --- include/drm-uapi/xe_drm.h | 92 ++++++++++++++---- tests/xe/xe_query.c | 195 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 269 insertions(+), 18 deletions(-) diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h index d1d49cd71..5318c299c 100644 --- a/include/drm-uapi/xe_drm.h +++ b/include/drm-uapi/xe_drm.h @@ -128,6 +128,25 @@ struct xe_user_extension { #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) #define DRM_IOCTL_XE_VM_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise) +/** struct drm_xe_engine_class_instance - instance of an engine class */ +struct drm_xe_engine_class_instance { +#define DRM_XE_ENGINE_CLASS_RENDER 0 +#define DRM_XE_ENGINE_CLASS_COPY 1 +#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE 2 +#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3 +#define DRM_XE_ENGINE_CLASS_COMPUTE 4 + /* + * Kernel only class (not actual hardware engine class). Used for + * creating ordered queues of VM bind operations. + */ +#define DRM_XE_ENGINE_CLASS_VM_BIND 5 + __u16 engine_class; + + __u16 engine_instance; + __u16 gt_id; + __u16 rsvd; +}; + /** * enum drm_xe_memory_class - Supported memory classes. */ @@ -223,6 +242,60 @@ struct drm_xe_query_mem_region { __u64 reserved[6]; }; +/** + * struct drm_xe_query_engine_cycles - correlate CPU and GPU timestamps + * + * If a query is made with a struct drm_xe_device_query where .query is equal to + * DRM_XE_QUERY_CS_CYCLES, then the reply uses struct drm_xe_query_engine_cycles + * in .data. struct drm_xe_query_engine_cycles is allocated by the user and + * .data points to this allocated structure. + * + * The query returns the command streamer cycles and the frequency that can + * be used to calculate the command streamer timestamp. In addition the + * query returns a set of cpu timestamps that indicate when the command + * streamer cycle count was captured. + */ +struct drm_xe_query_engine_cycles { + /** + * @eci: This is input by the user and is the engine for which command + * streamer cycles is queried. + */ + struct drm_xe_engine_class_instance eci; + + /** + * @clockid: This is input by the user and is the reference clock id for + * CPU timestamp. For definition, see clock_gettime(2) and + * perf_event_open(2). Supported clock ids are CLOCK_MONOTONIC, + * CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME, CLOCK_TAI. + */ + __s32 clockid; + + /** @width: Width of the engine cycle counter in bits. */ + __u32 width; + + /** + * @engine_cycles: Command streamer cycles as read from the command streamer + * register at 0x358 offset. + */ + __u64 engine_cycles; + + /** @engine_frequency: Frequency of the engine cycles in Hz. */ + __u64 engine_frequency; + + /** + * @cpu_timestamp: CPU timestamp in ns. The timestamp is captured before + * reading the engine_cycles register using the reference clockid set by the + * user. + */ + __u64 cpu_timestamp; + + /** + * @cpu_delta: Time delta in ns captured around reading the lower dword + * of the engine_cycles register. + */ + __u64 cpu_delta; +}; + /** * struct drm_xe_query_mem_usage - describe memory regions and usage * @@ -395,6 +468,7 @@ struct drm_xe_device_query { #define DRM_XE_DEVICE_QUERY_GTS 3 #define DRM_XE_DEVICE_QUERY_HWCONFIG 4 #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 +#define DRM_XE_QUERY_CS_CYCLES 6 /** @query: The type of data to query */ __u32 query; @@ -737,24 +811,6 @@ struct drm_xe_exec_queue_set_property { __u64 reserved[2]; }; -/** struct drm_xe_engine_class_instance - instance of an engine class */ -struct drm_xe_engine_class_instance { -#define DRM_XE_ENGINE_CLASS_RENDER 0 -#define DRM_XE_ENGINE_CLASS_COPY 1 -#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE 2 -#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3 -#define DRM_XE_ENGINE_CLASS_COMPUTE 4 - /* - * Kernel only class (not actual hardware engine class). Used for - * creating ordered queues of VM bind operations. - */ -#define DRM_XE_ENGINE_CLASS_VM_BIND 5 - __u16 engine_class; - - __u16 engine_instance; - __u16 gt_id; -}; - struct drm_xe_exec_queue_create { #define XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 /** @extensions: Pointer to the first extension struct, if any */ diff --git a/tests/xe/xe_query.c b/tests/xe/xe_query.c index a4e40afdd..7b289521f 100644 --- a/tests/xe/xe_query.c +++ b/tests/xe/xe_query.c @@ -468,6 +468,195 @@ test_query_invalid_extension(int fd) do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL); } +static bool +query_engine_cycles_supported(int fd) +{ + struct drm_xe_device_query query = { + .extensions = 0, + .query = DRM_XE_QUERY_CS_CYCLES, + .size = 0, + .data = 0, + }; + + return igt_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query) == 0; +} + +static void +query_engine_cycles(int fd, struct drm_xe_query_engine_cycles *resp) +{ + struct drm_xe_device_query query = { + .extensions = 0, + .query = DRM_XE_QUERY_CS_CYCLES, + .size = sizeof(*resp), + .data = to_user_pointer(resp), + }; + + do_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query); + igt_assert(query.size); +} + +static void +__engine_cycles(int fd, struct drm_xe_engine_class_instance *hwe) +{ + struct drm_xe_query_engine_cycles ts1 = {}; + struct drm_xe_query_engine_cycles ts2 = {}; + uint64_t delta_cpu, delta_engine, delta_delta; + unsigned int exec_queue; + int i, usable = 0; + igt_spin_t *spin; + uint64_t ahnd; + uint32_t vm; + struct { + int32_t id; + const char *name; + } clock[] = { + { CLOCK_MONOTONIC, "CLOCK_MONOTONIC" }, + { CLOCK_MONOTONIC_RAW, "CLOCK_MONOTONIC_RAW" }, + { CLOCK_REALTIME, "CLOCK_REALTIME" }, + { CLOCK_BOOTTIME, "CLOCK_BOOTTIME" }, + { CLOCK_TAI, "CLOCK_TAI" }, + }; + + igt_debug("engine[%u:%u]\n", + hwe->engine_class, + hwe->engine_instance); + + vm = xe_vm_create(fd, 0, 0); + exec_queue = xe_exec_queue_create(fd, vm, hwe, 0); + ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_RELOC); + spin = igt_spin_new(fd, .ahnd = ahnd, .engine = exec_queue, .vm = vm); + + /* Try a new clock every 10 iterations. */ +#define NUM_SNAPSHOTS 10 + for (i = 0; i < NUM_SNAPSHOTS * ARRAY_SIZE(clock); i++) { + int index = i / NUM_SNAPSHOTS; + + ts1.eci = *hwe; + ts1.clockid = clock[index].id; + + ts2.eci = *hwe; + ts2.clockid = clock[index].id; + + query_engine_cycles(fd, &ts1); + query_engine_cycles(fd, &ts2); + + igt_debug("[1] cpu_ts before %llu, reg read time %llu\n", + ts1.cpu_timestamp, + ts1.cpu_delta); + igt_debug("[1] engine_ts %llu, freq %llu Hz, width %u\n", + ts1.engine_cycles, ts1.engine_frequency, ts1.width); + + igt_debug("[2] cpu_ts before %llu, reg read time %llu\n", + ts2.cpu_timestamp, + ts2.cpu_delta); + igt_debug("[2] engine_ts %llu, freq %llu Hz, width %u\n", + ts2.engine_cycles, ts2.engine_frequency, ts2.width); + + delta_cpu = ts2.cpu_timestamp - ts1.cpu_timestamp; + + if (ts2.engine_cycles >= ts1.engine_cycles) + delta_engine = (ts2.engine_cycles - ts1.engine_cycles) * + NSEC_PER_SEC / ts1.engine_frequency; + else + delta_engine = (((1 << ts2.width) - ts2.engine_cycles) + ts1.engine_cycles) * + NSEC_PER_SEC / ts1.engine_frequency; + + igt_debug("delta_cpu[%lu], delta_engine[%lu]\n", + delta_cpu, delta_engine); + + delta_delta = delta_cpu > delta_engine ? + delta_cpu - delta_engine : + delta_engine - delta_cpu; + igt_debug("delta_delta %lu\n", delta_delta); + + if (delta_delta < 5000) + usable++; + + /* + * User needs few good snapshots of the timestamps to + * synchronize cpu time with engine time. Check if we have enough + * usable values before moving to the next clockid. + */ + if (!((i + 1) % NUM_SNAPSHOTS)) { + igt_debug("clock %s\n", clock[index].name); + igt_debug("usable %d\n", usable); + igt_assert(usable > 2); + usable = 0; + } + } + + igt_spin_free(fd, spin); + xe_exec_queue_destroy(fd, exec_queue); + xe_vm_destroy(fd, vm); + put_ahnd(ahnd); +} + +/** + * SUBTEST: query-engine-cycles + * Description: Query CPU-GPU timestamp correlation + */ +static void test_query_engine_cycles(int fd) +{ + struct drm_xe_engine_class_instance *hwe; + + igt_require(query_engine_cycles_supported(fd)); + + xe_for_each_hw_engine(fd, hwe) { + igt_assert(hwe); + __engine_cycles(fd, hwe); + } +} + +/** + * SUBTEST: query-invalid-engine-cycles + * Description: Check query with invalid arguments returns expected error code. + */ +static void test_engine_cycles_invalid(int fd) +{ + struct drm_xe_engine_class_instance *hwe; + struct drm_xe_query_engine_cycles ts = {}; + struct drm_xe_device_query query = { + .extensions = 0, + .query = DRM_XE_QUERY_CS_CYCLES, + .size = sizeof(ts), + .data = to_user_pointer(&ts), + }; + + igt_require(query_engine_cycles_supported(fd)); + + /* get one engine */ + xe_for_each_hw_engine(fd, hwe) + break; + + /* sanity check engine selection is valid */ + ts.eci = *hwe; + query_engine_cycles(fd, &ts); + + /* bad instance */ + ts.eci = *hwe; + ts.eci.engine_instance = 0xffff; + do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL); + ts.eci = *hwe; + + /* bad class */ + ts.eci.engine_class = 0xffff; + do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL); + ts.eci = *hwe; + + /* bad gt */ + ts.eci.gt_id = 0xffff; + do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL); + ts.eci = *hwe; + + /* bad clockid */ + ts.clockid = -1; + do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL); + ts.clockid = 0; + + /* sanity check */ + query_engine_cycles(fd, &ts); +} + igt_main { int xe; @@ -493,6 +682,12 @@ igt_main igt_subtest("query-topology") test_query_gt_topology(xe); + igt_subtest("query-engine-cycles") + test_query_engine_cycles(xe); + + igt_subtest("query-invalid-engine-cycles") + test_engine_cycles_invalid(xe); + igt_subtest("query-invalid-query") test_query_invalid_query(xe); -- 2.38.1