From: Rodrigo Vivi <rodrigo.vivi@intel.com>
To: Francois Dugast <francois.dugast@intel.com>
Cc: igt-dev@lists.freedesktop.org
Subject: Re: [igt-dev] [PATCH v4 02/14] tests/intel/xe_query: Add a test for querying engine cycles
Date: Thu, 28 Sep 2023 10:33:26 -0400 [thread overview]
Message-ID: <ZRWOtjYmxZ8LkNG6@intel.com> (raw)
In-Reply-To: <20230928110516.7-3-francois.dugast@intel.com>
On Thu, Sep 28, 2023 at 11:05:04AM +0000, Francois Dugast wrote:
> From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>
> The DRM_XE_QUERY_ENGINE_CYCLES query provides a way for the user to obtain
> CPU and GPU timestamps as close to each other as possible.
>
> Add a test to query engine cycles and GPU/CPU time correlation as well as
> validate the parameters.
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> Signed-off-by: Francois Dugast <francois.dugast@intel.com>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> [Rodrigo rebased after s/cs/engine]
while fixing the naming here and in the kernel side I got confident
that this is the right test for that uapi and patch is correct:
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> ---
> include/drm-uapi/xe_drm.h | 104 +++++++++++++++-----
> tests/intel/xe_query.c | 195 ++++++++++++++++++++++++++++++++++++++
> 2 files changed, 275 insertions(+), 24 deletions(-)
>
> diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h
> index 13cd6a73d..8a702e6f4 100644
> --- a/include/drm-uapi/xe_drm.h
> +++ b/include/drm-uapi/xe_drm.h
> @@ -128,6 +128,25 @@ struct xe_user_extension {
> #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
> #define DRM_IOCTL_XE_VM_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
>
> +/** struct drm_xe_engine_class_instance - instance of an engine class */
> +struct drm_xe_engine_class_instance {
> +#define DRM_XE_ENGINE_CLASS_RENDER 0
> +#define DRM_XE_ENGINE_CLASS_COPY 1
> +#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE 2
> +#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3
> +#define DRM_XE_ENGINE_CLASS_COMPUTE 4
> + /*
> + * Kernel only class (not actual hardware engine class). Used for
> + * creating ordered queues of VM bind operations.
> + */
> +#define DRM_XE_ENGINE_CLASS_VM_BIND 5
> + __u16 engine_class;
> +
> + __u16 engine_instance;
> + __u16 gt_id;
> + __u16 rsvd;
> +};
> +
> /**
> * enum drm_xe_memory_class - Supported memory classes.
> */
> @@ -219,6 +238,60 @@ struct drm_xe_query_mem_region {
> __u64 reserved[6];
> };
>
> +/**
> + * struct drm_xe_query_engine_cycles - correlate CPU and GPU timestamps
> + *
> + * If a query is made with a struct drm_xe_device_query where .query is equal to
> + * DRM_XE_DEVICE_QUERY_ENGINE_CYCLES, then the reply uses struct drm_xe_query_engine_cycles
> + * in .data. struct drm_xe_query_engine_cycles is allocated by the user and
> + * .data points to this allocated structure.
> + *
> + * The query returns the engine cycles and the frequency that can
> + * be used to calculate the engine timestamp. In addition the
> + * query returns a set of cpu timestamps that indicate when the command
> + * streamer cycle count was captured.
> + */
> +struct drm_xe_query_engine_cycles {
> + /**
> + * @eci: This is input by the user and is the engine for which command
> + * streamer cycles is queried.
> + */
> + struct drm_xe_engine_class_instance eci;
> +
> + /**
> + * @clockid: This is input by the user and is the reference clock id for
> + * CPU timestamp. For definition, see clock_gettime(2) and
> + * perf_event_open(2). Supported clock ids are CLOCK_MONOTONIC,
> + * CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME, CLOCK_TAI.
> + */
> + __s32 clockid;
> +
> + /** @width: Width of the engine cycle counter in bits. */
> + __u32 width;
> +
> + /**
> + * @engine_cycles: Engine cycles as read from its register
> + * at 0x358 offset.
> + */
> + __u64 engine_cycles;
> +
> + /** @engine_frequency: Frequency of the engine cycles in Hz. */
> + __u64 engine_frequency;
> +
> + /**
> + * @cpu_timestamp: CPU timestamp in ns. The timestamp is captured before
> + * reading the engine_cycles register using the reference clockid set by the
> + * user.
> + */
> + __u64 cpu_timestamp;
> +
> + /**
> + * @cpu_delta: Time delta in ns captured around reading the lower dword
> + * of the engine_cycles register.
> + */
> + __u64 cpu_delta;
> +};
> +
> /**
> * struct drm_xe_query_mem_usage - describe memory regions and usage
> *
> @@ -385,12 +458,13 @@ struct drm_xe_device_query {
> /** @extensions: Pointer to the first extension struct, if any */
> __u64 extensions;
>
> -#define DRM_XE_DEVICE_QUERY_ENGINES 0
> -#define DRM_XE_DEVICE_QUERY_MEM_USAGE 1
> -#define DRM_XE_DEVICE_QUERY_CONFIG 2
> -#define DRM_XE_DEVICE_QUERY_GTS 3
> -#define DRM_XE_DEVICE_QUERY_HWCONFIG 4
> -#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5
> +#define DRM_XE_DEVICE_QUERY_ENGINES 0
> +#define DRM_XE_DEVICE_QUERY_MEM_USAGE 1
> +#define DRM_XE_DEVICE_QUERY_CONFIG 2
> +#define DRM_XE_DEVICE_QUERY_GTS 3
> +#define DRM_XE_DEVICE_QUERY_HWCONFIG 4
> +#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5
> +#define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6
> /** @query: The type of data to query */
> __u32 query;
>
> @@ -732,24 +806,6 @@ struct drm_xe_exec_queue_set_property {
> __u64 reserved[2];
> };
>
> -/** struct drm_xe_engine_class_instance - instance of an engine class */
> -struct drm_xe_engine_class_instance {
> -#define DRM_XE_ENGINE_CLASS_RENDER 0
> -#define DRM_XE_ENGINE_CLASS_COPY 1
> -#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE 2
> -#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3
> -#define DRM_XE_ENGINE_CLASS_COMPUTE 4
> - /*
> - * Kernel only class (not actual hardware engine class). Used for
> - * creating ordered queues of VM bind operations.
> - */
> -#define DRM_XE_ENGINE_CLASS_VM_BIND 5
> - __u16 engine_class;
> -
> - __u16 engine_instance;
> - __u16 gt_id;
> -};
> -
> struct drm_xe_exec_queue_create {
> #define XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0
> /** @extensions: Pointer to the first extension struct, if any */
> diff --git a/tests/intel/xe_query.c b/tests/intel/xe_query.c
> index 5966968d3..3e7460ff4 100644
> --- a/tests/intel/xe_query.c
> +++ b/tests/intel/xe_query.c
> @@ -476,6 +476,195 @@ test_query_invalid_extension(int fd)
> do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL);
> }
>
> +static bool
> +query_engine_cycles_supported(int fd)
> +{
> + struct drm_xe_device_query query = {
> + .extensions = 0,
> + .query = DRM_XE_DEVICE_QUERY_ENGINE_CYCLES,
> + .size = 0,
> + .data = 0,
> + };
> +
> + return igt_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query) == 0;
> +}
> +
> +static void
> +query_engine_cycles(int fd, struct drm_xe_query_engine_cycles *resp)
> +{
> + struct drm_xe_device_query query = {
> + .extensions = 0,
> + .query = DRM_XE_DEVICE_QUERY_ENGINE_CYCLES,
> + .size = sizeof(*resp),
> + .data = to_user_pointer(resp),
> + };
> +
> + do_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
> + igt_assert(query.size);
> +}
> +
> +static void
> +__engine_cycles(int fd, struct drm_xe_engine_class_instance *hwe)
> +{
> + struct drm_xe_query_engine_cycles ts1 = {};
> + struct drm_xe_query_engine_cycles ts2 = {};
> + uint64_t delta_cpu, delta_cs, delta_delta;
> + unsigned int exec_queue;
> + int i, usable = 0;
> + igt_spin_t *spin;
> + uint64_t ahnd;
> + uint32_t vm;
> + struct {
> + int32_t id;
> + const char *name;
> + } clock[] = {
> + { CLOCK_MONOTONIC, "CLOCK_MONOTONIC" },
> + { CLOCK_MONOTONIC_RAW, "CLOCK_MONOTONIC_RAW" },
> + { CLOCK_REALTIME, "CLOCK_REALTIME" },
> + { CLOCK_BOOTTIME, "CLOCK_BOOTTIME" },
> + { CLOCK_TAI, "CLOCK_TAI" },
> + };
> +
> + igt_debug("engine[%u:%u]\n",
> + hwe->engine_class,
> + hwe->engine_instance);
> +
> + vm = xe_vm_create(fd, 0, 0);
> + exec_queue = xe_exec_queue_create(fd, vm, hwe, 0);
> + ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_RELOC);
> + spin = igt_spin_new(fd, .ahnd = ahnd, .engine = exec_queue, .vm = vm);
> +
> + /* Try a new clock every 10 iterations. */
> +#define NUM_SNAPSHOTS 10
> + for (i = 0; i < NUM_SNAPSHOTS * ARRAY_SIZE(clock); i++) {
> + int index = i / NUM_SNAPSHOTS;
> +
> + ts1.eci = *hwe;
> + ts1.clockid = clock[index].id;
> +
> + ts2.eci = *hwe;
> + ts2.clockid = clock[index].id;
> +
> + query_engine_cycles(fd, &ts1);
> + query_engine_cycles(fd, &ts2);
> +
> + igt_debug("[1] cpu_ts before %llu, reg read time %llu\n",
> + ts1.cpu_timestamp,
> + ts1.cpu_delta);
> + igt_debug("[1] engine_ts %llu, freq %llu Hz, width %u\n",
> + ts1.engine_cycles, ts1.engine_frequency, ts1.width);
> +
> + igt_debug("[2] cpu_ts before %llu, reg read time %llu\n",
> + ts2.cpu_timestamp,
> + ts2.cpu_delta);
> + igt_debug("[2] engine_ts %llu, freq %llu Hz, width %u\n",
> + ts2.engine_cycles, ts2.engine_frequency, ts2.width);
> +
> + delta_cpu = ts2.cpu_timestamp - ts1.cpu_timestamp;
> +
> + if (ts2.engine_cycles >= ts1.engine_cycles)
> + delta_cs = (ts2.engine_cycles - ts1.engine_cycles) *
> + NSEC_PER_SEC / ts1.engine_frequency;
> + else
> + delta_cs = (((1 << ts2.width) - ts2.engine_cycles) + ts1.engine_cycles) *
> + NSEC_PER_SEC / ts1.engine_frequency;
> +
> + igt_debug("delta_cpu[%lu], delta_cs[%lu]\n",
> + delta_cpu, delta_cs);
> +
> + delta_delta = delta_cpu > delta_cs ?
> + delta_cpu - delta_cs :
> + delta_cs - delta_cpu;
> + igt_debug("delta_delta %lu\n", delta_delta);
> +
> + if (delta_delta < 5000)
> + usable++;
> +
> + /*
> + * User needs few good snapshots of the timestamps to
> + * synchronize cpu time with cs time. Check if we have enough
> + * usable values before moving to the next clockid.
> + */
> + if (!((i + 1) % NUM_SNAPSHOTS)) {
> + igt_debug("clock %s\n", clock[index].name);
> + igt_debug("usable %d\n", usable);
> + igt_assert(usable > 2);
> + usable = 0;
> + }
> + }
> +
> + igt_spin_free(fd, spin);
> + xe_exec_queue_destroy(fd, exec_queue);
> + xe_vm_destroy(fd, vm);
> + put_ahnd(ahnd);
> +}
> +
> +/**
> + * SUBTEST: query-cs-cycles
> + * Description: Query CPU-GPU timestamp correlation
> + */
> +static void test_query_engine_cycles(int fd)
> +{
> + struct drm_xe_engine_class_instance *hwe;
> +
> + igt_require(query_engine_cycles_supported(fd));
> +
> + xe_for_each_hw_engine(fd, hwe) {
> + igt_assert(hwe);
> + __engine_cycles(fd, hwe);
> + }
> +}
> +
> +/**
> + * SUBTEST: query-invalid-cs-cycles
> + * Description: Check query with invalid arguments returns expected error code.
> + */
> +static void test_engine_cycles_invalid(int fd)
> +{
> + struct drm_xe_engine_class_instance *hwe;
> + struct drm_xe_query_engine_cycles ts = {};
> + struct drm_xe_device_query query = {
> + .extensions = 0,
> + .query = DRM_XE_DEVICE_QUERY_ENGINE_CYCLES,
> + .size = sizeof(ts),
> + .data = to_user_pointer(&ts),
> + };
> +
> + igt_require(query_engine_cycles_supported(fd));
> +
> + /* get one engine */
> + xe_for_each_hw_engine(fd, hwe)
> + break;
> +
> + /* sanity check engine selection is valid */
> + ts.eci = *hwe;
> + query_engine_cycles(fd, &ts);
> +
> + /* bad instance */
> + ts.eci = *hwe;
> + ts.eci.engine_instance = 0xffff;
> + do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL);
> + ts.eci = *hwe;
> +
> + /* bad class */
> + ts.eci.engine_class = 0xffff;
> + do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL);
> + ts.eci = *hwe;
> +
> + /* bad gt */
> + ts.eci.gt_id = 0xffff;
> + do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL);
> + ts.eci = *hwe;
> +
> + /* bad clockid */
> + ts.clockid = -1;
> + do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL);
> + ts.clockid = 0;
> +
> + /* sanity check */
> + query_engine_cycles(fd, &ts);
> +}
> +
> igt_main
> {
> int xe;
> @@ -501,6 +690,12 @@ igt_main
> igt_subtest("query-topology")
> test_query_gt_topology(xe);
>
> + igt_subtest("query-cs-cycles")
> + test_query_engine_cycles(xe);
> +
> + igt_subtest("query-invalid-cs-cycles")
> + test_engine_cycles_invalid(xe);
> +
> igt_subtest("query-invalid-query")
> test_query_invalid_query(xe);
>
> --
> 2.34.1
>
next prev parent reply other threads:[~2023-09-28 14:35 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-28 11:05 [igt-dev] [PATCH v4 00/14] uAPI Alignment - take 1 v4 Francois Dugast
2023-09-28 11:05 ` [igt-dev] [PATCH v4 01/14] drm-uapi/xe_drm: Align with new PMU interface Francois Dugast
2023-09-28 11:33 ` Francois Dugast
2023-09-28 11:05 ` [igt-dev] [PATCH v4 02/14] tests/intel/xe_query: Add a test for querying engine cycles Francois Dugast
2023-09-28 14:33 ` Rodrigo Vivi [this message]
2023-09-28 11:05 ` [igt-dev] [PATCH v4 03/14] drm-uapi/xe_drm: Separate VM_BIND's operation and flag, align with latest uapi Francois Dugast
2023-09-28 11:05 ` [igt-dev] [PATCH v4 04/14] drm-uapi/xe_drm: Remove MMIO ioctl and " Francois Dugast
2023-09-28 14:36 ` Rodrigo Vivi
2023-09-28 11:05 ` [igt-dev] [PATCH v4 05/14] xe_exec_balancer: Enable parallel submission and compute mode Francois Dugast
2023-09-29 16:27 ` Souza, Jose
2023-09-28 11:05 ` [igt-dev] [PATCH v4 06/14] xe_exec_threads: Use DRM_XE_VM_CREATE_COMPUTE_MODE when creating a compute VM Francois Dugast
2023-09-28 11:05 ` [igt-dev] [PATCH v4 07/14] xe: Update uAPI and remove XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE Francois Dugast
2023-09-28 11:05 ` [igt-dev] [PATCH v4 08/14] drm-uapi/xe: Use common drm_xe_ext_set_property extension Francois Dugast
2023-09-28 12:19 ` Francois Dugast
2023-09-28 11:05 ` [igt-dev] [PATCH v4 09/14] drm-uapi: Kill XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS extension Francois Dugast
2023-09-28 13:36 ` Francois Dugast
2023-09-28 11:05 ` [igt-dev] [PATCH v4 10/14] xe: Update to new VM bind uAPI Francois Dugast
2023-09-29 16:32 ` Souza, Jose
2023-10-03 9:35 ` Francois Dugast
2023-10-03 14:25 ` Souza, Jose
2023-09-28 11:05 ` [igt-dev] [PATCH v4 11/14] drm-uapi/xe: Replace useless 'instance' per unique gt_id Francois Dugast
2023-09-28 12:00 ` Francois Dugast
2023-09-28 11:05 ` [igt-dev] [PATCH v4 12/14] drm-uapi/xe: Remove unused field of drm_xe_query_gt Francois Dugast
2023-09-28 11:25 ` Francois Dugast
2023-09-28 11:05 ` [igt-dev] [PATCH v4 13/14] drm-uapi/xe: Rename gts to gt_list Francois Dugast
2023-09-28 12:07 ` Francois Dugast
2023-09-28 11:05 ` [igt-dev] [PATCH v4 14/14] drm-uapi/xe: Fix naming of XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY Francois Dugast
2023-09-28 11:27 ` Francois Dugast
2023-09-28 12:11 ` [igt-dev] ✗ CI.xeBAT: failure for uAPI Alignment - take 1 (rev3) Patchwork
2023-09-28 12:15 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork
2023-09-28 23:36 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZRWOtjYmxZ8LkNG6@intel.com \
--to=rodrigo.vivi@intel.com \
--cc=francois.dugast@intel.com \
--cc=igt-dev@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.