From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by gabe.freedesktop.org (Postfix) with ESMTPS id 0E5C710E46E for ; Thu, 6 Jul 2023 16:00:30 +0000 (UTC) From: Andrzej Hajda To: igt-dev@lists.freedesktop.org Date: Thu, 6 Jul 2023 18:00:14 +0200 Message-Id: <20230706160014.1027512-1-andrzej.hajda@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [igt-dev] [PATCH i-g-t] i915/gem_exec_await: Avoid DG2 conflicts List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson , Chris Wilson , Nirmoy Das Errors-To: igt-dev-bounces@lists.freedesktop.org Sender: "igt-dev" List-ID: From: Chris Wilson DG2 is restricted in what contexts/engines can be run concurrently, if we submit a non-preemptible context on both rcs/ccs it will only run one at a time. Progress (heartbeats) along ccs will be blocked by rcs, and vice versa. This is independent of the ccs switch holdout w/a. Since this is not required for constructing a wide set of active fences (a fence is active until it has been signaled, whether it is running on HW or waiting to run is irrelevant to the signal state), refactor the context construction to be favourable for DG2. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5892 Signed-off-by: Chris Wilson [ahajda: adjust to upstream driver] Signed-off-by: Andrzej Hajda --- tests/i915/gem_exec_await.c | 180 ++++++++++++++++-------------------- 1 file changed, 80 insertions(+), 100 deletions(-) diff --git a/tests/i915/gem_exec_await.c b/tests/i915/gem_exec_await.c index 53b7bac2f96..b87adcb4a18 100644 --- a/tests/i915/gem_exec_await.c +++ b/tests/i915/gem_exec_await.c @@ -25,12 +25,23 @@ #include #include +#include "drmtest.h" #include "i915/gem.h" #include "i915/gem_create.h" -#include "igt.h" +#include "i915/gem_engine_topology.h" +#include "i915/gem_mman.h" +#include "i915/gem_submission.h" +#include "i915/gem_vm.h" +#include "igt_aux.h" +#include "igt_core.h" #include "igt_rand.h" #include "igt_sysfs.h" +#include "igt_types.h" #include "igt_vgem.h" +#include "ioctl_wrappers.h" +#include "intel_chipset.h" +#include "intel_ctx.h" +#include "intel_gpu_commands.h" /** * TEST: gem exec await * Category: Infrastructure @@ -66,7 +77,7 @@ static void xchg_obj(void *array, unsigned i, unsigned j) } #define CONTEXTS 0x1 -static void wide(int fd, const intel_ctx_t *ctx, int ring_size, +static void wide(int fd, intel_ctx_cfg_t *cfg, int ring_size, int timeout, unsigned int flags) { const struct intel_execution_engine2 *engine; @@ -75,7 +86,6 @@ static void wide(int fd, const intel_ctx_t *ctx, int ring_size, struct { struct drm_i915_gem_exec_object2 *obj; struct drm_i915_gem_exec_object2 exec[2]; - struct drm_i915_gem_relocation_entry reloc; struct drm_i915_gem_execbuffer2 execbuf; const intel_ctx_t *ctx; uint32_t *cmd; @@ -83,9 +93,13 @@ static void wide(int fd, const intel_ctx_t *ctx, int ring_size, struct drm_i915_gem_exec_object2 *obj; struct drm_i915_gem_execbuffer2 execbuf; unsigned engines[I915_EXEC_RING_MASK + 1], nengine; + const intel_ctx_t *ctx; unsigned long count; double time; - uint64_t ahnd = get_reloc_ahnd(fd, 0); /* just offset provider */ + + __gem_vm_create(fd, &cfg->vm); + if (__intel_ctx_create(fd, cfg, &ctx)) + ctx = intel_ctx_0(fd); nengine = 0; for_each_ctx_engine(fd, ctx, engine) { @@ -102,7 +116,7 @@ static void wide(int fd, const intel_ctx_t *ctx, int ring_size, igt_assert(exec); igt_require_memory(nengine*(2 + ring_size), 4096, CHECK_RAM); - obj = calloc(nengine*ring_size + 1, sizeof(*obj)); + obj = calloc(nengine * (ring_size + 1) + 1, sizeof(*obj)); igt_assert(obj); for (unsigned e = 0; e < nengine; e++) { @@ -111,69 +125,63 @@ static void wide(int fd, const intel_ctx_t *ctx, int ring_size, for (unsigned n = 0; n < ring_size; n++) { exec[e].obj[n].handle = gem_create(fd, 4096); exec[e].obj[n].flags = EXEC_OBJECT_WRITE; - exec[e].obj[n].offset = get_offset(ahnd, exec[e].obj[n].handle, - 4096, 0); - if (ahnd) - exec[e].obj[n].flags |= EXEC_OBJECT_PINNED; - - obj[e*ring_size + n].handle = exec[e].obj[n].handle; - obj[e*ring_size + n].offset = exec[e].obj[n].offset; + obj[e * ring_size + n] = exec[e].obj[n]; } exec[e].execbuf.buffers_ptr = to_user_pointer(exec[e].exec); - exec[e].execbuf.buffer_count = 1; - exec[e].execbuf.flags = (engines[e] | - I915_EXEC_NO_RELOC | - I915_EXEC_HANDLE_LUT); + exec[e].execbuf.buffer_count = 2; + exec[e].execbuf.flags = engines[e]; + exec[e].execbuf.rsvd1 = ctx->id; if (flags & CONTEXTS) { - exec[e].ctx = intel_ctx_create(fd, &ctx->cfg); + exec[e].ctx = intel_ctx_create(fd, cfg); exec[e].execbuf.rsvd1 = exec[e].ctx->id; - } else { - exec[e].execbuf.rsvd1 = ctx->id; } - exec[e].exec[0].handle = gem_create(fd, 4096); - exec[e].exec[0].offset = get_offset(ahnd, exec[e].exec[0].handle, - 4096, 0); - if (ahnd) - exec[e].exec[0].flags = EXEC_OBJECT_PINNED; - - exec[e].cmd = gem_mmap__device_coherent(fd, exec[e].exec[0].handle, - 0, 4096, PROT_WRITE); - - gem_set_domain(fd, exec[e].exec[0].handle, - I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC); - exec[e].cmd[0] = MI_BATCH_BUFFER_END; - - gem_execbuf(fd, &exec[e].execbuf); - exec[e].exec[1] = exec[e].exec[0]; - exec[e].execbuf.buffer_count = 2; - - exec[e].reloc.target_handle = 1; /* recurse */ - exec[e].reloc.offset = sizeof(uint32_t); - exec[e].reloc.read_domains = I915_GEM_DOMAIN_COMMAND; - if (gen < 4) - exec[e].reloc.delta = 1; - - exec[e].exec[1].relocs_ptr = to_user_pointer(&exec[e].reloc); - exec[e].exec[1].relocation_count = !ahnd ? 1 : 0; + exec[e].exec[1].handle = gem_create(fd, 4096); + obj[nengine * ring_size + e] = exec[e].exec[1]; } - obj[nengine*ring_size].handle = gem_create(fd, 4096); - gem_write(fd, obj[nengine*ring_size].handle, 0, &bbe, sizeof(bbe)); - - obj[nengine*ring_size].offset = get_offset(ahnd, obj[nengine*ring_size].handle, - 4096, 0); - if (ahnd) - obj[nengine*ring_size].flags |= EXEC_OBJECT_PINNED; + obj[nengine * (ring_size + 1)].handle = gem_create(fd, 4096); + gem_write(fd, obj[nengine * (ring_size + 1)].handle, 0, + &bbe, sizeof(bbe)); memset(&execbuf, 0, sizeof(execbuf)); - execbuf.buffers_ptr = to_user_pointer(&obj[nengine*ring_size]); - execbuf.buffer_count = 1; - gem_execbuf(fd, &execbuf); /* tag the object as a batch in the GTT */ execbuf.buffers_ptr = to_user_pointer(obj); - execbuf.buffer_count = nengine*ring_size + 1; + execbuf.buffer_count = nengine * (ring_size + 1) + 1; + execbuf.rsvd1 = ctx->id; + gem_execbuf(fd, &execbuf); /* tag the object as a batch in the GTT */ + for (unsigned e = 0; e < nengine; e++) { + uint64_t address; + uint32_t *cs; + + for (unsigned n = 0; n < ring_size; n++) { + obj[e * ring_size + n].flags |= EXEC_OBJECT_PINNED; + exec[e].obj[n] = obj[e * ring_size + n]; + } + exec[e].exec[1] = obj[nengine * ring_size + e]; + exec[e].exec[1].flags |= EXEC_OBJECT_PINNED; + address = exec[e].exec[1].offset; + + exec[e].cmd = gem_mmap__device_coherent(fd, exec[e].exec[1].handle, + 0, 4096, PROT_WRITE); + cs = exec[e].cmd; + + *cs++ = MI_NOOP; + if (gen >= 8) { + *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *cs++ = address; + *cs++ = address >> 32; + } else if (gen >= 6) { + *cs++ = MI_BATCH_BUFFER_START | 1 << 8; + *cs++ = address; + } else { + *cs++ = MI_BATCH_BUFFER_START | 2 << 6; + if (gen < 4) + address |= 1; + *cs++ = address; + } + } intel_detect_and_clear_missed_interrupts(fd); @@ -182,42 +190,22 @@ static void wide(int fd, const intel_ctx_t *ctx, int ring_size, igt_until_timeout(timeout) { struct timespec start, now; for (unsigned e = 0; e < nengine; e++) { - uint64_t address; - int i; - if (flags & CONTEXTS) { intel_ctx_destroy(fd, exec[e].ctx); - exec[e].ctx = intel_ctx_create(fd, &ctx->cfg); + exec[e].ctx = intel_ctx_create(fd, cfg); exec[e].execbuf.rsvd1 = exec[e].ctx->id; } - exec[e].reloc.presumed_offset = exec[e].exec[1].offset; - address = (exec[e].reloc.presumed_offset + - exec[e].reloc.delta); gem_set_domain(fd, exec[e].exec[1].handle, I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC); + exec[e].cmd[0] = MI_ARB_CHECK; - i = 0; - exec[e].cmd[i] = MI_BATCH_BUFFER_START; - if (gen >= 8) { - exec[e].cmd[i] |= 1 << 8 | 1; - exec[e].cmd[++i] = address; - exec[e].cmd[++i] = address >> 32; - } else if (gen >= 6) { - exec[e].cmd[i] |= 1 << 8; - exec[e].cmd[++i] = address; - } else { - exec[e].cmd[i] |= 2 << 6; - exec[e].cmd[++i] = address; - } - - exec[e].exec[0] = obj[nengine*ring_size]; + exec[e].exec[0] = obj[nengine * (ring_size + 1)]; gem_execbuf(fd, &exec[e].execbuf); for (unsigned n = 0; n < ring_size; n++) { exec[e].exec[0] = exec[e].obj[n]; gem_execbuf(fd, &exec[e].execbuf); - exec[e].obj[n].offset = exec[e].exec[0].offset; } } @@ -225,10 +213,7 @@ static void wide(int fd, const intel_ctx_t *ctx, int ring_size, clock_gettime(CLOCK_MONOTONIC, &start); for (unsigned e = 0; e < nengine; e++) { - execbuf.flags = (engines[e] | - I915_EXEC_NO_RELOC | - I915_EXEC_HANDLE_LUT); - execbuf.rsvd1 = ctx->id; + execbuf.flags = engines[e]; gem_execbuf(fd, &execbuf); } clock_gettime(CLOCK_MONOTONIC, &now); @@ -245,43 +230,40 @@ static void wide(int fd, const intel_ctx_t *ctx, int ring_size, igt_info("%s: %'lu cycles: %.3fus\n", __func__, count, time*1e6 / count); - gem_close(fd, obj[nengine*ring_size].handle); + for (unsigned n = 0; n < nengine * (ring_size + 1) + 1; n++) + gem_close(fd, obj[n].handle); free(obj); for (unsigned e = 0; e < nengine; e++) { if (flags & CONTEXTS) intel_ctx_destroy(fd, exec[e].ctx); - for (unsigned n = 0; n < ring_size; n++) { - gem_close(fd, exec[e].obj[n].handle); - put_offset(ahnd, exec[e].obj[n].handle); - } - free(exec[e].obj); - munmap(exec[e].cmd, 4096); - gem_close(fd, exec[e].exec[1].handle); - put_offset(ahnd, exec[e].exec[1].handle); + free(exec[e].obj); } free(exec); - put_ahnd(ahnd); + + intel_ctx_destroy(fd, ctx); + __gem_vm_destroy(fd, cfg->vm); + cfg->vm = 0; } #define TIMEOUT 20 igt_main { + intel_ctx_cfg_t cfg; int ring_size = 0; - int device = -1; - const intel_ctx_t *ctx; + igt_fd_t(device); igt_fixture { device = drm_open_driver(DRIVER_INTEL); igt_require_gem(device); gem_submission_print_method(device); - ctx = intel_ctx_create_all_physical(device); + cfg = intel_ctx_cfg_all_physical(device); - ring_size = gem_submission_measure(device, &ctx->cfg, ALL_ENGINES); + ring_size = gem_submission_measure(device, &cfg, ALL_ENGINES); igt_info("Ring size: %d batches\n", ring_size); igt_require(ring_size > 0); @@ -290,16 +272,14 @@ igt_main } igt_subtest("wide-all") - wide(device, ctx, ring_size, TIMEOUT, 0); + wide(device, &cfg, ring_size, TIMEOUT, 0); igt_subtest("wide-contexts") { gem_require_contexts(device); - wide(device, ctx, ring_size, TIMEOUT, CONTEXTS); + wide(device, &cfg, ring_size, TIMEOUT, CONTEXTS); } igt_fixture { igt_stop_hang_detector(); - intel_ctx_destroy(device, ctx); - drm_close_driver(device); } } -- 2.34.1