From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mgamail.intel.com (mgamail.intel.com [192.55.52.43]) by gabe.freedesktop.org (Postfix) with ESMTPS id B93A210E59B for ; Fri, 20 Oct 2023 09:38:37 +0000 (UTC) From: Matthew Auld To: igt-dev@lists.freedesktop.org Date: Fri, 20 Oct 2023 10:38:00 +0100 Message-ID: <20231020093801.631809-15-matthew.auld@intel.com> In-Reply-To: <20231020093801.631809-1-matthew.auld@intel.com> References: <20231020093801.631809-1-matthew.auld@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Subject: [igt-dev] [PATCH i-g-t v5 14/15] tests/xe: add some vm_bind pat_index tests List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Nitish Kumar Errors-To: igt-dev-bounces@lists.freedesktop.org Sender: "igt-dev" List-ID: Add some basic tests for pat_index and vm_bind. v2: Make sure to actually use srand() with the chosen seed - Make it work on xe2; the wt mode now has compression. - Also test some xe2+ specific pat_index modes. v3: Fix decompress step. v4: (Niranjana) - Various improvements, including testing more pat_index modes, like wc where possible. - Document the idea behind "common" modes. v5: (Niranjana) - Clarify the size_modes. Also rather just use 2M instead of 4M, which better matches the "single-pde" description. - Also test some mtl modes. - Test the max pat_index on xe2, since it uses pat[0]-pat[4]. Signed-off-by: Matthew Auld Cc: Niranjana Vishwanathapura Cc: José Roberto de Souza Cc: Pallavi Mishra Cc: Nitish Kumar --- tests/intel/xe_pat.c | 774 +++++++++++++++++++++++++++++++++++++++++++ tests/meson.build | 1 + 2 files changed, 775 insertions(+) create mode 100644 tests/intel/xe_pat.c diff --git a/tests/intel/xe_pat.c b/tests/intel/xe_pat.c new file mode 100644 index 000000000..f8c79a2b9 --- /dev/null +++ b/tests/intel/xe_pat.c @@ -0,0 +1,774 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +/** + * TEST: Test for selecting per-VMA pat_index + * Category: Software building block + * Sub-category: VMA + * Functionality: pat_index + */ + +#include "igt.h" +#include "intel_blt.h" +#include "intel_mocs.h" +#include "intel_pat.h" + +#include "xe/xe_ioctl.h" +#include "xe/xe_query.h" +#include "xe/xe_util.h" + +#define PAGE_SIZE 4096 + +static bool do_slow_check; + +/** + * SUBTEST: userptr-coh-none + * Test category: functionality test + * Description: Test non-coherent pat_index on userptr + */ +static void userptr_coh_none(int fd) +{ + size_t size = xe_get_default_alignment(fd); + uint32_t vm; + void *data; + + data = mmap(0, size, PROT_READ | + PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + igt_assert(data != MAP_FAILED); + + vm = xe_vm_create(fd, 0, 0); + + /* + * Try some valid combinations first just to make sure we're not being + * swindled. + */ + igt_assert_eq(__xe_vm_bind(fd, vm, 0, 0, to_user_pointer(data), 0x40000, + size, XE_VM_BIND_OP_MAP_USERPTR, 0, NULL, 0, 0, + DEFAULT_PAT_INDEX, 0), + 0); + xe_vm_unbind_sync(fd, vm, 0, 0x40000, size); + igt_assert_eq(__xe_vm_bind(fd, vm, 0, 0, to_user_pointer(data), 0x40000, + size, XE_VM_BIND_OP_MAP_USERPTR, 0, NULL, 0, 0, + intel_get_pat_idx_wb(fd), 0), + 0); + xe_vm_unbind_sync(fd, vm, 0, 0x40000, size); + + /* And then some known COH_NONE pat_index combos which should fail. */ + igt_assert_eq(__xe_vm_bind(fd, vm, 0, 0, to_user_pointer(data), 0x40000, + size, XE_VM_BIND_OP_MAP_USERPTR, 0, NULL, 0, 0, + intel_get_pat_idx_uc(fd), 0), + -EINVAL); + igt_assert_eq(__xe_vm_bind(fd, vm, 0, 0, to_user_pointer(data), 0x40000, + size, XE_VM_BIND_OP_MAP_USERPTR, 0, NULL, 0, 0, + intel_get_pat_idx_wt(fd), 0), + -EINVAL); + + munmap(data, size); + xe_vm_destroy(fd, vm); +} + +/** + * SUBTEST: pat-index-all + * Test category: functionality test + * Description: Test every pat_index + */ +static void pat_index_all(int fd) +{ + uint16_t dev_id = intel_get_drm_devid(fd); + size_t size = xe_get_default_alignment(fd); + uint32_t vm, bo; + uint8_t pat_index; + + vm = xe_vm_create(fd, 0, 0); + + bo = xe_bo_create_caching(fd, 0, size, all_memory_regions(fd), + DRM_XE_GEM_CPU_CACHING_WC, + DRM_XE_GEM_COH_NONE); + + igt_assert_eq(__xe_vm_bind(fd, vm, 0, bo, 0, 0x40000, + size, XE_VM_BIND_OP_MAP, 0, NULL, 0, 0, + intel_get_pat_idx_uc(fd), 0), + 0); + xe_vm_unbind_sync(fd, vm, 0, 0x40000, size); + + igt_assert_eq(__xe_vm_bind(fd, vm, 0, bo, 0, 0x40000, + size, XE_VM_BIND_OP_MAP, 0, NULL, 0, 0, + intel_get_pat_idx_wt(fd), 0), + 0); + xe_vm_unbind_sync(fd, vm, 0, 0x40000, size); + + igt_assert_eq(__xe_vm_bind(fd, vm, 0, bo, 0, 0x40000, + size, XE_VM_BIND_OP_MAP, 0, NULL, 0, 0, + intel_get_pat_idx_wb(fd), 0), + 0); + xe_vm_unbind_sync(fd, vm, 0, 0x40000, size); + + igt_assert(intel_get_max_pat_index(fd)); + + for (pat_index = 0; pat_index <= intel_get_max_pat_index(fd); + pat_index++) { + if (intel_get_device_info(dev_id)->graphics_ver == 20 && + pat_index >= 16 && pat_index <= 19) { /* hw reserved */ + igt_assert_eq(__xe_vm_bind(fd, vm, 0, bo, 0, 0x40000, + size, XE_VM_BIND_OP_MAP, 0, NULL, 0, 0, + pat_index, 0), + -EINVAL); + } else { + igt_assert_eq(__xe_vm_bind(fd, vm, 0, bo, 0, 0x40000, + size, XE_VM_BIND_OP_MAP, 0, NULL, 0, 0, + pat_index, 0), + 0); + xe_vm_unbind_sync(fd, vm, 0, 0x40000, size); + } + } + + igt_assert_eq(__xe_vm_bind(fd, vm, 0, bo, 0, 0x40000, + size, XE_VM_BIND_OP_MAP, 0, NULL, 0, 0, + pat_index, 0), + -EINVAL); + + gem_close(fd, bo); + + /* Must be at least as coherent as the gem_create coh_mode. */ + bo = xe_bo_create_caching(fd, 0, size, system_memory(fd), + DRM_XE_GEM_CPU_CACHING_WB, + DRM_XE_GEM_COH_AT_LEAST_1WAY); + + igt_assert_eq(__xe_vm_bind(fd, vm, 0, bo, 0, 0x40000, + size, XE_VM_BIND_OP_MAP, 0, NULL, 0, 0, + intel_get_pat_idx_uc(fd), 0), + -EINVAL); + + igt_assert_eq(__xe_vm_bind(fd, vm, 0, bo, 0, 0x40000, + size, XE_VM_BIND_OP_MAP, 0, NULL, 0, 0, + intel_get_pat_idx_wt(fd), 0), + -EINVAL); + + igt_assert_eq(__xe_vm_bind(fd, vm, 0, bo, 0, 0x40000, + size, XE_VM_BIND_OP_MAP, 0, NULL, 0, 0, + intel_get_pat_idx_wb(fd), 0), + 0); + xe_vm_unbind_sync(fd, vm, 0, 0x40000, size); + + gem_close(fd, bo); + + xe_vm_destroy(fd, vm); +} + +#define CLEAR_1 0xFFFFFFFF /* something compressible */ + +static void xe2_blt_decompress_dst(int fd, + intel_ctx_t *ctx, + uint64_t ahnd, + struct blt_copy_data *blt, + uint32_t alias_handle, + uint32_t size) +{ + struct blt_copy_object tmp = {}; + + /* + * Xe2 in-place decompression using an alias to the same physical + * memory, but with the dst mapped using some uncompressed pat_index. + * This should allow checking the object pages via mmap. + */ + + memcpy(&tmp, &blt->src, sizeof(blt->dst)); + memcpy(&blt->src, &blt->dst, sizeof(blt->dst)); + blt_set_object(&blt->dst, alias_handle, size, 0, + intel_get_uc_mocs_index(fd), + intel_get_pat_idx_uc(fd), /* compression disabled */ + T_LINEAR, 0, 0); + blt_fast_copy(fd, ctx, NULL, ahnd, blt); + memcpy(&blt->dst, &blt->src, sizeof(blt->dst)); + memcpy(&blt->src, &tmp, sizeof(blt->dst)); +} + +struct xe_pat_size_mode { + uint16_t width; + uint16_t height; + uint32_t alignment; + const char *name; +}; + +struct xe_pat_param { + int fd; + + const struct xe_pat_size_mode *size; + + uint32_t r1; + uint8_t r1_pat_index; + uint16_t r1_coh_mode; + bool r1_force_cpu_wc; + + uint32_t r2; + uint8_t r2_pat_index; + uint16_t r2_coh_mode; + bool r2_force_cpu_wc; + bool r2_compressed; /* xe2+ compression */ + +}; + +static void pat_index_blt(struct xe_pat_param *p) +{ + struct drm_xe_engine_class_instance inst = { + .engine_class = DRM_XE_ENGINE_CLASS_COPY, + }; + struct blt_copy_data blt = {}; + struct blt_copy_object src = {}; + struct blt_copy_object dst = {}; + uint32_t vm, exec_queue, src_bo, dst_bo, bb; + uint32_t *src_map, *dst_map; + uint16_t r1_cpu_caching, r2_cpu_caching; + uint32_t r1_flags, r2_flags; + intel_ctx_t *ctx; + uint64_t ahnd; + int width = p->size->width, height = p->size->height; + int size, stride, bb_size; + int bpp = 32; + uint32_t alias, name; + int fd = p->fd; + int i; + + igt_require(blt_has_fast_copy(fd)); + + vm = xe_vm_create(fd, DRM_XE_VM_CREATE_ASYNC_DEFAULT, 0); + exec_queue = xe_exec_queue_create(fd, vm, &inst, 0); + ctx = intel_ctx_xe(fd, vm, exec_queue, 0, 0, 0); + ahnd = intel_allocator_open_full(fd, ctx->vm, 0, 0, + INTEL_ALLOCATOR_SIMPLE, + ALLOC_STRATEGY_LOW_TO_HIGH, + p->size->alignment); + + bb_size = xe_get_default_alignment(fd); + bb = xe_bo_create_flags(fd, 0, bb_size, system_memory(fd)); + + size = width * height * bpp / 8; + stride = width * 4; + + r1_flags = 0; + if (p->r1 != system_memory(fd)) + r1_flags |= XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM; + + if (p->r1_coh_mode == DRM_XE_GEM_COH_AT_LEAST_1WAY + && p->r1 == system_memory(fd) && !p->r1_force_cpu_wc) + r1_cpu_caching = DRM_XE_GEM_CPU_CACHING_WB; + else + r1_cpu_caching = DRM_XE_GEM_CPU_CACHING_WC; + + r2_flags = 0; + if (p->r2 != system_memory(fd)) + r2_flags |= XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM; + + if (p->r2_coh_mode == DRM_XE_GEM_COH_AT_LEAST_1WAY && + p->r2 == system_memory(fd) && !p->r2_force_cpu_wc) + r2_cpu_caching = DRM_XE_GEM_CPU_CACHING_WB; + else + r2_cpu_caching = DRM_XE_GEM_CPU_CACHING_WC; + + + src_bo = xe_bo_create_caching(fd, 0, size, p->r1 | r1_flags, r1_cpu_caching, + p->r1_coh_mode); + dst_bo = xe_bo_create_caching(fd, 0, size, p->r2 | r2_flags, r2_cpu_caching, + p->r2_coh_mode); + if (p->r2_compressed) { + name = gem_flink(fd, dst_bo); + alias = gem_open(fd, name); + } + + blt_copy_init(fd, &blt); + blt.color_depth = CD_32bit; + + blt_set_object(&src, src_bo, size, p->r1, intel_get_uc_mocs_index(fd), + p->r1_pat_index, T_LINEAR, + COMPRESSION_DISABLED, COMPRESSION_TYPE_3D); + blt_set_geom(&src, stride, 0, 0, width, height, 0, 0); + + blt_set_object(&dst, dst_bo, size, p->r2, intel_get_uc_mocs_index(fd), + p->r2_pat_index, T_LINEAR, + COMPRESSION_DISABLED, COMPRESSION_TYPE_3D); + blt_set_geom(&dst, stride, 0, 0, width, height, 0, 0); + + blt_set_copy_object(&blt.src, &src); + blt_set_copy_object(&blt.dst, &dst); + blt_set_batch(&blt.bb, bb, bb_size, system_memory(fd)); + + src_map = xe_bo_map(fd, src_bo, size); + dst_map = xe_bo_map(fd, dst_bo, size); + + /* Ensure we always see zeroes for the initial KMD zeroing */ + blt_fast_copy(fd, ctx, NULL, ahnd, &blt); + if (p->r2_compressed) + xe2_blt_decompress_dst(fd, ctx, ahnd, &blt, alias, size); + + /* + * Only sample random dword in every page if we are doing slow uncached + * reads from VRAM. + */ + if (!do_slow_check && p->r2 != system_memory(fd)) { + int dwords_page = PAGE_SIZE / sizeof(uint32_t); + int dword = rand() % dwords_page; + + igt_debug("random dword: %d\n", dword); + + for (i = dword; i < size / sizeof(uint32_t); i += dwords_page) + igt_assert_eq(dst_map[i], 0); + + } else { + for (i = 0; i < size / sizeof(uint32_t); i++) + igt_assert_eq(dst_map[i], 0); + } + + /* Write some values from the CPU, potentially dirtying the CPU cache */ + for (i = 0; i < size / sizeof(uint32_t); i++) { + if (p->r2_compressed) + src_map[i] = CLEAR_1; + else + src_map[i] = i; + } + + /* And finally ensure we always see the CPU written values */ + blt_fast_copy(fd, ctx, NULL, ahnd, &blt); + if (p->r2_compressed) + xe2_blt_decompress_dst(fd, ctx, ahnd, &blt, alias, size); + + if (!do_slow_check && p->r2 != system_memory(fd)) { + int dwords_page = PAGE_SIZE / sizeof(uint32_t); + int dword = rand() % dwords_page; + + igt_debug("random dword: %d\n", dword); + + for (i = dword; i < size / sizeof(uint32_t); i += dwords_page) { + if (p->r2_compressed) + igt_assert_eq(dst_map[i], CLEAR_1); + else + igt_assert_eq(dst_map[i], i); + } + + } else { + for (i = 0; i < size / sizeof(uint32_t); i++) { + if (p->r2_compressed) + igt_assert_eq(dst_map[i], CLEAR_1); + else + igt_assert_eq(dst_map[i], i); + } + } + + munmap(src_map, size); + munmap(dst_map, size); + + gem_close(fd, src_bo); + gem_close(fd, dst_bo); + gem_close(fd, bb); + + xe_exec_queue_destroy(fd, exec_queue); + xe_vm_destroy(fd, vm); + + put_ahnd(ahnd); + intel_ctx_destroy(fd, ctx); +} + +static void pat_index_render(struct xe_pat_param *p) +{ + int fd = p->fd; + uint32_t devid = intel_get_drm_devid(fd); + igt_render_copyfunc_t render_copy = NULL; + int size, stride, width = p->size->width, height = p->size->height; + struct intel_buf src, dst; + struct intel_bb *ibb; + struct buf_ops *bops; + uint16_t r1_cpu_caching, r2_cpu_caching; + uint32_t r1_flags, r2_flags; + uint32_t src_bo, dst_bo; + uint32_t *src_map, *dst_map; + int bpp = 32; + int i; + + bops = buf_ops_create(fd); + + render_copy = igt_get_render_copyfunc(devid); + igt_require(render_copy); + igt_require(!p->r2_compressed); /* XXX */ + igt_require(xe_has_engine_class(fd, DRM_XE_ENGINE_CLASS_RENDER)); + + ibb = intel_bb_create_full(fd, 0, 0, NULL, xe_get_default_alignment(fd), + 0, 0, p->size->alignment, + INTEL_ALLOCATOR_SIMPLE, + ALLOC_STRATEGY_HIGH_TO_LOW); + + if (p->r1_coh_mode == DRM_XE_GEM_COH_AT_LEAST_1WAY + && p->r1 == system_memory(fd) && !p->r1_force_cpu_wc) + r1_cpu_caching = DRM_XE_GEM_CPU_CACHING_WB; + else + r1_cpu_caching = DRM_XE_GEM_CPU_CACHING_WC; + + if (p->r2_coh_mode == DRM_XE_GEM_COH_AT_LEAST_1WAY && + p->r2 == system_memory(fd) && !p->r2_force_cpu_wc) + r2_cpu_caching = DRM_XE_GEM_CPU_CACHING_WB; + else + r2_cpu_caching = DRM_XE_GEM_CPU_CACHING_WC; + + size = width * height * bpp / 8; + stride = width * 4; + + r1_flags = 0; + if (p->r1 != system_memory(fd)) + r1_flags |= XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM; + + src_bo = xe_bo_create_caching(fd, 0, size, p->r1 | r1_flags, r1_cpu_caching, + p->r1_coh_mode); + intel_buf_init_full(bops, src_bo, &src, width, height, bpp, 0, + I915_TILING_NONE, I915_COMPRESSION_NONE, size, + stride, p->r1, p->r1_pat_index); + + r2_flags = 0; + if (p->r2 != system_memory(fd)) + r2_flags |= XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM; + + dst_bo = xe_bo_create_caching(fd, 0, size, p->r2 | r2_flags, r2_cpu_caching, + p->r2_coh_mode); + intel_buf_init_full(bops, dst_bo, &dst, width, height, bpp, 0, + I915_TILING_NONE, I915_COMPRESSION_NONE, size, + stride, p->r2, p->r2_pat_index); + + src_map = xe_bo_map(fd, src_bo, size); + dst_map = xe_bo_map(fd, dst_bo, size); + + /* Ensure we always see zeroes for the initial KMD zeroing */ + render_copy(ibb, + &src, + 0, 0, width, height, + &dst, + 0, 0); + intel_bb_sync(ibb); + + if (!do_slow_check && p->r2 != system_memory(fd)) { + int dwords_page = PAGE_SIZE / sizeof(uint32_t); + int dword = rand() % dwords_page; + + igt_debug("random dword: %d\n", dword); + + for (i = dword; i < size / sizeof(uint32_t); i += dwords_page) + igt_assert_eq(dst_map[i], 0); + } else { + for (i = 0; i < size / sizeof(uint32_t); i++) + igt_assert_eq(dst_map[i], 0); + } + + /* Write some values from the CPU, potentially dirtying the CPU cache */ + for (i = 0; i < size / sizeof(uint32_t); i++) + src_map[i] = i; + + /* And finally ensure we always see the CPU written values */ + render_copy(ibb, + &src, + 0, 0, width, height, + &dst, + 0, 0); + intel_bb_sync(ibb); + + if (!do_slow_check && p->r2 != system_memory(fd)) { + int dwords_page = PAGE_SIZE / sizeof(uint32_t); + int dword = rand() % dwords_page; + + igt_debug("random dword: %d\n", dword); + + for (i = dword; i < size / sizeof(uint32_t); i += dwords_page) + igt_assert_eq(dst_map[i], i); + } else { + for (i = 0; i < size / sizeof(uint32_t); i++) + igt_assert_eq(dst_map[i], i); + } + + munmap(src_map, size); + munmap(dst_map, size); + + intel_bb_destroy(ibb); + + gem_close(fd, src_bo); + gem_close(fd, dst_bo); +} + +static uint8_t get_pat_idx_uc(int fd, bool *compressed) +{ + if (compressed) + *compressed = false; + + return intel_get_pat_idx_uc(fd); +} + +static uint8_t get_pat_idx_wt(int fd, bool *compressed) +{ + uint16_t dev_id = intel_get_drm_devid(fd); + + if (compressed) + *compressed = intel_get_device_info(dev_id)->graphics_ver == 20; + + return intel_get_pat_idx_wt(fd); +} + +static uint8_t get_pat_idx_wb(int fd, bool *compressed) +{ + if (compressed) + *compressed = false; + + return intel_get_pat_idx_wb(fd); +} + +struct pat_index_entry { + uint8_t (*get_pat_index)(int fd, bool *compressed); + + uint8_t pat_index; + bool compressed; + + const char *name; + uint16_t coh_mode; + bool force_cpu_wc; +}; + +/* + * The common modes are available on all platforms supported by Xe and so should + * be commonly supported. There are many more possible pat_index modes, however + * most IGTs shouldn't really care about them so likely no need to add them to + * lib/intel_pat.c. We do try to test some on the non-common modes here. + */ +const struct pat_index_entry common_pat_index_modes[] = { + { get_pat_idx_uc, 0, 0, "uc", DRM_XE_GEM_COH_NONE }, + { get_pat_idx_wt, 0, 0, "wt", DRM_XE_GEM_COH_NONE }, + { get_pat_idx_wb, 0, 0, "wb", DRM_XE_GEM_COH_AT_LEAST_1WAY }, + { get_pat_idx_wb, 0, 0, "wb-cpu-wc", DRM_XE_GEM_COH_AT_LEAST_1WAY, true }, +}; + +const struct pat_index_entry xelp_pat_index_modes[] = { + { NULL, 1, false, "wc", DRM_XE_GEM_COH_NONE }, +}; + +const struct pat_index_entry xehpc_pat_index_modes[] = { + { NULL, 1, false, "wc", DRM_XE_GEM_COH_NONE }, + { NULL, 4, false, "c1-wt", DRM_XE_GEM_COH_NONE }, + { NULL, 5, false, "c1-wb", DRM_XE_GEM_COH_AT_LEAST_1WAY }, + { NULL, 6, false, "c2-wt", DRM_XE_GEM_COH_NONE }, + { NULL, 7, false, "c2-wb", DRM_XE_GEM_COH_AT_LEAST_1WAY }, +}; + +const struct pat_index_entry xelpg_pat_index_modes[] = { + { NULL, 0, false, "wb-none", DRM_XE_GEM_COH_NONE }, + { NULL, 3, false, "1way", DRM_XE_GEM_COH_AT_LEAST_1WAY }, + { NULL, 4, false, "2way-atomics", DRM_XE_GEM_COH_AT_LEAST_1WAY }, + { NULL, 4, false, "2way-atomics-cpu-wc", DRM_XE_GEM_COH_AT_LEAST_1WAY, true }, +}; + +/* Too many, just pick some of the interesting ones */ +const struct pat_index_entry xe2_pat_index_modes[] = { + { NULL, 1, false, "1way", DRM_XE_GEM_COH_AT_LEAST_1WAY }, + { NULL, 2, false, "2way", DRM_XE_GEM_COH_AT_LEAST_1WAY }, + { NULL, 2, false, "2way-cpu-wc", DRM_XE_GEM_COH_AT_LEAST_1WAY, true }, + { NULL, 3, true, "uc-comp", DRM_XE_GEM_COH_NONE }, + { NULL, 5, false, "uc-1way", DRM_XE_GEM_COH_AT_LEAST_1WAY }, + { NULL, 31, false, "c3-2way", DRM_XE_GEM_COH_AT_LEAST_1WAY }, +}; + +/* + * Depending on 2M/1G GTT pages we might trigger different PTE layouts for the + * PAT bits, so make sure we test with and without huge-pages. Also ensure we + * have a mix of different pat_index modes for each PDE. + */ +const struct xe_pat_size_mode size_modes[] = { + { 256, 256, 0, "mixed-pde" }, /* 256K */ + { 1024, 512, 1u << 21, "single-pde" }, /* 2M and hopefully 2M GTT page */ +}; + +typedef void (*copy_fn)(struct xe_pat_param *p); + +const struct xe_pat_copy_mode { + copy_fn fn; + const char *name; +} copy_modes[] = { + { pat_index_blt, "blt" }, + { pat_index_render, "render" }, +}; + +/** + * SUBTEST: pat-index-common + * Test category: functionality test + * Description: Check the common pat_index modes. + */ + +/** + * SUBTEST: pat-index-xelp + * Test category: functionality test + * Description: Check some of the xelp pat_index modes. + */ + +/** + * SUBTEST: pat-index-xehpc + * Test category: functionality test + * Description: Check some of the xehpc pat_index modes. + */ + +/** + * SUBTEST: pat-index-xelpg + * Test category: functionality test + * Description: Check some of the xelpg pat_index modes. + */ + +/** + * SUBTEST: pat-index-xe2 + * Test category: functionality test + * Description: Check some of the xe2 pat_index modes. + */ + +static void subtest_pat_index_modes_with_regions(int fd, + const struct pat_index_entry *modes_arr, + int n_modes) +{ + struct igt_collection *copy_set; + struct igt_collection *pat_index_set; + struct igt_collection *regions_set; + struct igt_collection *sizes_set; + struct igt_collection *copies; + struct xe_pat_param p = {}; + + p.fd = fd; + + copy_set = igt_collection_create(ARRAY_SIZE(copy_modes)); + + pat_index_set = igt_collection_create(n_modes); + + regions_set = xe_get_memory_region_set(fd, + XE_MEM_REGION_CLASS_SYSMEM, + XE_MEM_REGION_CLASS_VRAM); + + sizes_set = igt_collection_create(ARRAY_SIZE(size_modes)); + + for_each_variation_r(copies, 1, copy_set) { + struct igt_collection *regions; + struct xe_pat_copy_mode copy_mode; + + copy_mode = copy_modes[igt_collection_get_value(copies, 0)]; + + for_each_variation_r(regions, 2, regions_set) { + struct igt_collection *pat_modes; + uint32_t r1, r2; + char *reg_str; + + r1 = igt_collection_get_value(regions, 0); + r2 = igt_collection_get_value(regions, 1); + + reg_str = xe_memregion_dynamic_subtest_name(fd, regions); + + for_each_variation_r(pat_modes, 2, pat_index_set) { + struct igt_collection *sizes; + struct pat_index_entry r1_entry, r2_entry; + int r1_idx, r2_idx; + + r1_idx = igt_collection_get_value(pat_modes, 0); + r2_idx = igt_collection_get_value(pat_modes, 1); + + r1_entry = modes_arr[r1_idx]; + r2_entry = modes_arr[r2_idx]; + + if (r1_entry.get_pat_index) + p.r1_pat_index = r1_entry.get_pat_index(fd, NULL); + else + p.r1_pat_index = r1_entry.pat_index; + + if (r2_entry.get_pat_index) + p.r2_pat_index = r2_entry.get_pat_index(fd, &p.r2_compressed); + else { + p.r2_pat_index = r2_entry.pat_index; + p.r2_compressed = r2_entry.compressed; + } + + p.r1_coh_mode = r1_entry.coh_mode; + p.r2_coh_mode = r2_entry.coh_mode; + + p.r1_force_cpu_wc = r1_entry.force_cpu_wc; + p.r2_force_cpu_wc = r2_entry.force_cpu_wc; + + p.r1 = r1; + p.r2 = r2; + + for_each_variation_r(sizes, 1, sizes_set) { + int size_mode_idx = igt_collection_get_value(sizes, 0); + + p.size = &size_modes[size_mode_idx]; + + igt_debug("[r1]: r: %u, idx: %u, coh: %u, wc: %d\n", + p.r1, p.r1_pat_index, p.r1_coh_mode, p.r1_force_cpu_wc); + igt_debug("[r2]: r: %u, idx: %u, coh: %u, wc: %d, comp: %d, w: %u, h: %u, a: %u\n", + p.r2, p.r2_pat_index, p.r2_coh_mode, + p.r2_force_cpu_wc, p.r2_compressed, + p.size->width, p.size->height, + p.size->alignment); + + igt_dynamic_f("%s-%s-%s-%s-%s", + copy_mode.name, + reg_str, r1_entry.name, + r2_entry.name, p.size->name) + copy_mode.fn(&p); + } + } + + free(reg_str); + } + } +} + +igt_main +{ + uint16_t dev_id; + int fd; + + igt_fixture { + uint32_t seed; + + fd = drm_open_driver(DRIVER_XE); + dev_id = intel_get_drm_devid(fd); + + seed = time(NULL); + srand(seed); + igt_debug("seed: %d\n", seed); + + xe_device_get(fd); + } + + igt_subtest("pat-index-all") + pat_index_all(fd); + + igt_subtest("userptr-coh-none") + userptr_coh_none(fd); + + igt_subtest_with_dynamic("pat-index-common") { + subtest_pat_index_modes_with_regions(fd, common_pat_index_modes, + ARRAY_SIZE(common_pat_index_modes)); + } + + igt_subtest_with_dynamic("pat-index-xelp") { + igt_require(intel_graphics_ver(dev_id) <= IP_VER(12, 55)); + subtest_pat_index_modes_with_regions(fd, xelp_pat_index_modes, + ARRAY_SIZE(xelp_pat_index_modes)); + } + + igt_subtest_with_dynamic("pat-index-xehpc") { + igt_require(IS_PONTEVECCHIO(dev_id)); + subtest_pat_index_modes_with_regions(fd, xehpc_pat_index_modes, + ARRAY_SIZE(xehpc_pat_index_modes)); + } + + igt_subtest_with_dynamic("pat-index-xelpg") { + igt_require(IS_METEORLAKE(dev_id)); + subtest_pat_index_modes_with_regions(fd, xelpg_pat_index_modes, + ARRAY_SIZE(xelpg_pat_index_modes)); + } + + igt_subtest_with_dynamic("pat-index-xe2") { + igt_require(intel_get_device_info(dev_id)->graphics_ver >= 20); + subtest_pat_index_modes_with_regions(fd, xe2_pat_index_modes, + ARRAY_SIZE(xe2_pat_index_modes)); + } + + igt_fixture + drm_close_driver(fd); +} diff --git a/tests/meson.build b/tests/meson.build index 5afcd8cbb..3aecfbee0 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -297,6 +297,7 @@ intel_xe_progs = [ 'xe_mmap', 'xe_module_load', 'xe_noexec_ping_pong', + 'xe_pat', 'xe_pm', 'xe_pm_residency', 'xe_prime_self_import', -- 2.41.0