From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.11]) by gabe.freedesktop.org (Postfix) with ESMTPS id BF68510E755 for ; Thu, 30 Nov 2023 21:40:38 +0000 (UTC) From: David Kershner To: igt-dev@lists.freedesktop.org, michael.j.ruhl@intel.com, john.fleck@intel.com, katarzyna.piecielska@intel.com Date: Thu, 30 Nov 2023 16:40:32 -0500 Message-Id: <20231130214032.4152491-1-david.kershner@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Subject: [igt-dev] [PATCH i-g-t v4] tests/intel: Add Xe peer2peer test List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: igt-dev-bounces@lists.freedesktop.org Sender: "igt-dev" List-ID: Add tests to read/write data between two different GPUs using DMABUF and P2PDMA. The kernel must have P2PDMA and DMABUF enabled for the test pass. Acked-by: Michael J. Ruhl Signed-off-by: David Kershner --- Changes from v1 -- - fixed whitespace issues - fixed review comments Changes from v2 -- - fixed peer2peer test description - updates due to rebase Changes from v3 -- - updaets due to rebase --- tests/intel/xe_peer2peer.c | 374 +++++++++++++++++++++++++++++++++++++ tests/meson.build | 1 + 2 files changed, 375 insertions(+) create mode 100644 tests/intel/xe_peer2peer.c diff --git a/tests/intel/xe_peer2peer.c b/tests/intel/xe_peer2peer.c new file mode 100644 index 000000000..e81c5bd7b --- /dev/null +++ b/tests/intel/xe_peer2peer.c @@ -0,0 +1,374 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "drm.h" +#include "igt.h" +#include "igt_device.h" +#include "intel_blt.h" +#include "intel_mocs.h" +#include "lib/igt_sysfs.h" +#include "lib/intel_chipset.h" +#include "lib/intel_pat.h" +#include "xe/xe_ioctl.h" +#include "xe/xe_query.h" +#include "xe/xe_util.h" + +/** + * TEST: xe_peer2peer + * Category: Hardware building block + * Sub-category: MultiGPU + * Functionality: dma buf copy + * Description: Peer2peer dma buf copy tests + * Test category: xe + * + * SUBTEST: read + * Description: + * dma buf copy read + * + * SUBTEST: write + * Description: + * dma buf copy write + */ + +IGT_TEST_DESCRIPTION("Exercise blitter read/writes between two Xe devices"); + +struct blt_fast_copy_data { + int xe; + struct blt_copy_object src; + struct blt_copy_object mid; + struct blt_copy_object dst; + + struct blt_copy_batch bb; + enum blt_color_depth color_depth; +}; + +struct gpu_info { + uint32_t id; + int fd; + struct igt_collection *set; +}; + +static bool has_prime(int fd) +{ + uint64_t value; + uint64_t mask = DRM_PRIME_CAP_IMPORT | DRM_PRIME_CAP_EXPORT; + + if (drmGetCap(fd, DRM_CAP_PRIME, &value)) + return false; + + return (value & mask) == mask; +} + +static int get_device_info(struct gpu_info gpus[], int num_gpus) +{ + int cnt; + int xe; + int i; + + for (i = 0, cnt = 0 && i < 128; cnt < num_gpus; i++) { + xe = __drm_open_driver_another(i, DRIVER_XE); + if (xe < 0) + break; + + /* dma-buf is required */ + if (!has_prime(xe) || !blt_has_fast_copy(xe)) { + close(xe); + continue; + } + + gpus[cnt].fd = xe; + gpus[cnt].set = xe_get_memory_region_set(xe, + DRM_XE_MEM_REGION_CLASS_SYSMEM, + DRM_XE_MEM_REGION_CLASS_VRAM); + cnt++; + } + + return cnt; +} + +/** + * test_read - Read an imported buffer from an external GPU via dma-buf + * @ex_gpu: device providing the original object + * @im_gpu: device doing the read + * @ex_reg: the source region to copy from + * @im_reg: the destination region to copy to + * + */ +static void test_read(struct gpu_info *ex_gpu, struct gpu_info *im_gpu, + uint32_t ex_reg, uint32_t im_reg) +{ + struct blt_copy_data im_blt = {}; + struct blt_copy_data ex_blt = {}; + struct blt_copy_object *dst; + struct blt_copy_object *im_src; + struct blt_copy_object *src; + const uint32_t bpp = 32; + uint64_t im_bb_size = xe_get_default_alignment(im_gpu->fd); + uint64_t ahnd; + uint32_t bb; + uint32_t width = 1024, height = 1024; + int result; + uint32_t vm, exec_queue; + uint32_t ex_xe = ex_gpu->fd; + uint32_t im_xe = im_gpu->fd; + uint32_t ex_src, dmabuf; + uint32_t stride; + + struct drm_xe_engine_class_instance inst = { + .engine_class = DRM_XE_ENGINE_CLASS_COPY, + }; + intel_ctx_t *ctx; + + vm = xe_vm_create(im_xe, DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT, 0); + exec_queue = xe_exec_queue_create(im_xe, vm, &inst, 0); + ctx = intel_ctx_xe(im_xe, vm, exec_queue, 0, 0, 0); + ahnd = intel_allocator_open_full(im_xe, ctx->vm, 0, 0, + INTEL_ALLOCATOR_SIMPLE, + ALLOC_STRATEGY_LOW_TO_HIGH, 0); + + blt_copy_init(ex_xe, &ex_blt); + blt_copy_init(im_xe, &im_blt); + + src = blt_create_object(&ex_blt, ex_reg, width, height, bpp, 0, + T_LINEAR, COMPRESSION_DISABLED, 0, true); + dst = blt_create_object(&im_blt, im_reg, width, height, bpp, 0, + T_LINEAR, COMPRESSION_DISABLED, 0, true); + blt_surface_fill_rect(ex_xe, src, width, height); + + dmabuf = prime_handle_to_fd(ex_xe, src->handle); + ex_src = prime_fd_to_handle(im_xe, dmabuf); + im_src = calloc(1, sizeof(*im_src)); + + stride = width * 4; + blt_set_object(im_src, ex_src, src->size, ex_reg, 0, DEFAULT_PAT_INDEX, + T_LINEAR, COMPRESSION_DISABLED, 0); + blt_set_geom(im_src, stride, 0, 0, width, height, 0, 0); + igt_assert(im_src->size == dst->size); + + im_blt.color_depth = CD_32bit; + blt_set_copy_object(&im_blt.src, im_src); + blt_set_copy_object(&im_blt.dst, dst); + + bb = xe_bo_create_flags(im_xe, 0, im_bb_size, im_reg); + blt_set_batch(&im_blt.bb, bb, im_bb_size, im_reg); + + blt_fast_copy(im_xe, ctx, NULL, ahnd, &im_blt); + + result = memcmp(src->ptr, im_blt.dst.ptr, src->size); + + put_offset(ahnd, im_src->handle); + put_offset(ahnd, dst->handle); + put_offset(ahnd, bb); + intel_allocator_bind(ahnd, 0, 0); + blt_destroy_object(im_xe, im_src); + blt_destroy_object(im_xe, dst); + blt_destroy_object(ex_xe, src); + put_ahnd(ahnd); + + igt_assert_f(!result, "source and destination surfaces differs!\n"); +} + +/** + * test_write - Write an imported buffer to an external GPU via dma-buf + * @ex_gpu: device providing the destination object + * @im_gpu: device doing the write + * @ex_reg: the source region to copy from + * @im_reg: the destination region to copy to + * + */ +static void test_write(struct gpu_info *ex_gpu, struct gpu_info *im_gpu, + uint32_t ex_reg, uint32_t im_reg) +{ + struct blt_copy_data im_blt = {}; + struct blt_copy_data ex_blt = {}; + struct blt_copy_object *dst; + struct blt_copy_object *im_dst; + struct blt_copy_object *src; + const uint32_t bpp = 32; + uint64_t im_bb_size = xe_get_default_alignment(im_gpu->fd); + uint64_t ahnd; + uint32_t bb; + uint32_t width = 1024, height = 1024; + int result; + uint32_t vm, exec_queue; + uint32_t ex_xe = ex_gpu->fd; + uint32_t im_xe = im_gpu->fd; + uint32_t ex_dst, dmabuf; + uint32_t stride; + + struct drm_xe_engine_class_instance inst = { + .engine_class = DRM_XE_ENGINE_CLASS_COPY, + }; + intel_ctx_t *ctx; + + vm = xe_vm_create(im_xe, DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT, 0); + exec_queue = xe_exec_queue_create(im_xe, vm, &inst, 0); + ctx = intel_ctx_xe(im_xe, vm, exec_queue, 0, 0, 0); + ahnd = intel_allocator_open_full(im_xe, ctx->vm, 0, 0, + INTEL_ALLOCATOR_SIMPLE, + ALLOC_STRATEGY_LOW_TO_HIGH, 0); + + blt_copy_init(ex_xe, &ex_blt); + blt_copy_init(im_xe, &im_blt); + + dst = blt_create_object(&ex_blt, ex_reg, width, height, bpp, 0, + T_LINEAR, COMPRESSION_DISABLED, 0, true); + src = blt_create_object(&im_blt, im_reg, width, height, bpp, 0, + T_LINEAR, COMPRESSION_DISABLED, 0, true); + blt_surface_fill_rect(im_xe, src, width, height); + + dmabuf = prime_handle_to_fd(ex_xe, dst->handle); + ex_dst = prime_fd_to_handle(im_xe, dmabuf); + im_dst = calloc(1, sizeof(*im_dst)); + + stride = width * 4; + blt_set_object(im_dst, ex_dst, src->size, ex_reg, 0, DEFAULT_PAT_INDEX, + T_LINEAR, COMPRESSION_DISABLED, 0); + blt_set_geom(im_dst, stride, 0, 0, width, height, 0, 0); + igt_assert(im_dst->size == src->size); + + im_blt.color_depth = CD_32bit; + blt_set_copy_object(&im_blt.src, src); + blt_set_copy_object(&im_blt.dst, im_dst); + + bb = xe_bo_create_flags(im_xe, 0, im_bb_size, im_reg); + blt_set_batch(&im_blt.bb, bb, im_bb_size, im_reg); + + blt_fast_copy(im_xe, ctx, NULL, ahnd, &im_blt); + + result = memcmp(dst->ptr, im_blt.src.ptr, src->size); + + put_offset(ahnd, im_dst->handle); + put_offset(ahnd, dst->handle); + put_offset(ahnd, bb); + intel_allocator_bind(ahnd, 0, 0); + blt_destroy_object(im_xe, src); + blt_destroy_object(im_xe, im_dst); + blt_destroy_object(ex_xe, dst); + put_ahnd(ahnd); + + igt_assert_f(!result, "source and destination surfaces differs!\n"); +} + +static const char *p2p_path(int ex_reg, struct gpu_info *ex_gpu, struct gpu_info *im_gpu) +{ + return "-p2p"; +} + +static char *region_name(int xe, uint32_t region) +{ + char *name; + struct drm_xe_query_mem_region *memreg; + int r; + int len = 7; + + /* enough for "name%d" * n */ + name = malloc(len); + igt_assert(name); + + memreg = xe_mem_region(xe, region); + + if (XE_IS_CLASS_VRAM(memreg)) + r = snprintf(name, len, "%s%d", + xe_region_name(region), + memreg->instance); + else + r = snprintf(name, len, "%s", + xe_region_name(region)); + + igt_assert(r > 0); + + return name; +} + +/** + * gpu_read - Set up a read from the exporting GPU to the importing GPU + * @ex_gpu: GPU that is exporting a buffer for read + * @im_gpu: GPU that is importing and reading the buffer + */ +static void gpu_read(struct gpu_info *ex_gpu, struct gpu_info *im_gpu) +{ + struct igt_collection *ex_regs, *im_regs; + int ex_reg, im_reg; + char *ex_name, *im_name; + const char *path; + + for_each_variation_r(ex_regs, 1, ex_gpu->set) { + ex_reg = igt_collection_get_value(ex_regs, 0); + ex_name = region_name(ex_gpu->fd, ex_reg); + + for_each_variation_r(im_regs, 1, im_gpu->set) { + im_reg = igt_collection_get_value(im_regs, 0); + im_name = region_name(im_gpu->fd, im_reg); + + path = p2p_path(ex_reg, ex_gpu, im_gpu); + igt_dynamic_f("read-gpuA-%s-gpuB-%s%s", ex_name, + im_name, path) + test_read(ex_gpu, im_gpu, ex_reg, im_reg); + + free(im_name); + } + free(ex_name); + } +} + +/** + * gpu_write - Set up a write from the importing GPU to the exporting GPU + * @ex_gpu: GPU that is exporting a buffer for read + * @im_gpu: GPU that is importing and reading the buffer + */ +static void gpu_write(struct gpu_info *ex_gpu, struct gpu_info *im_gpu) +{ + struct igt_collection *ex_regs, *im_regs; + int ex_reg, im_reg; + char *ex_name, *im_name; + const char *path; + + for_each_variation_r(ex_regs, 1, ex_gpu->set) { + ex_reg = igt_collection_get_value(ex_regs, 0); + ex_name = region_name(ex_gpu->fd, ex_reg); + + for_each_variation_r(im_regs, 1, im_gpu->set) { + im_reg = igt_collection_get_value(im_regs, 0); + im_name = region_name(im_gpu->fd, im_reg); + + path = p2p_path(ex_reg, ex_gpu, im_gpu); + igt_dynamic_f("write-gpuA-%s-gpuB-%s%s", ex_name, + im_name, path) + test_write(ex_gpu, im_gpu, ex_reg, im_reg); + + free(im_name); + } + free(ex_name); + } +} + +#define DEFAULT_SIZE 0 + +igt_main_args("", NULL, NULL, NULL, NULL) +{ + struct gpu_info gpus[2]; + int gpu_cnt; + + igt_fixture { + gpu_cnt = get_device_info(gpus, ARRAY_SIZE(gpus)); + igt_skip_on(gpu_cnt < 2); + } + + igt_describe("dmabuf gpu-gpu read"); + igt_subtest_with_dynamic_f("read") + gpu_read(&gpus[0], &gpus[1]); + + igt_describe("dmabuf gpu-gpu write"); + igt_subtest_with_dynamic_f("write") + gpu_write(&gpus[0], &gpus[1]); + + igt_fixture { + int cnt; + + for (cnt = 0; cnt < gpu_cnt; cnt++) + drm_close_driver(gpus[cnt].fd); + } +} diff --git a/tests/meson.build b/tests/meson.build index facf60ccf..514f8ac85 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -302,6 +302,7 @@ intel_xe_progs = [ 'xe_module_load', 'xe_noexec_ping_pong', 'xe_pat', + 'xe_peer2peer', 'xe_pm', 'xe_pm_residency', 'xe_prime_self_import', -- 2.38.1