From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga12.intel.com (mga12.intel.com [192.55.52.136]) by gabe.freedesktop.org (Postfix) with ESMTPS id B013D10E63D for ; Fri, 2 Jun 2023 10:09:44 +0000 (UTC) Message-ID: Date: Fri, 2 Jun 2023 12:09:40 +0200 MIME-Version: 1.0 Content-Language: en-US To: Maarten Lankhorst , igt-dev@lists.freedesktop.org References: <20230529124824.1876093-1-maarten.lankhorst@linux.intel.com> <60097a7b-c74c-ee47-59ca-b32e85a1ac65@linux.intel.com> <5f4f3f11-ff4b-c52e-7aa2-72cb670c2cf0@linux.intel.com> From: =?UTF-8?Q?Thomas_Hellstr=c3=b6m?= In-Reply-To: <5f4f3f11-ff4b-c52e-7aa2-72cb670c2cf0@linux.intel.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit Subject: Re: [igt-dev] [PATCH i-g-t] tests/xe: Add a test for corner cases of eviction. List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: igt-dev-bounces@lists.freedesktop.org Sender: "igt-dev" List-ID: On 5/31/23 20:18, Maarten Lankhorst wrote: > On 2023-05-31 11:53, Thomas Hellström wrote: >> Hi, >> >> >> On 5/30/23 16:55, Maarten Lankhorst wrote: >>> Cc >>> >>> On 2023-05-29 14:48, Maarten Lankhorst wrote: >>>> Gitlab issues 194 and 239 show some corner cases. In particular, >>>> signal handling, pt eviction and losing track of external objects >>>> on the list. >>>> >>>> The patch series to fix those was sent as >>>> https://patchwork.freedesktop.org/series/118428/ >>>> >>>> But because it's tricky to reproduce those issues, I had to add some >>>> custom igt's to handle those. >>>> >>>> Signed-off-by: Maarten Lankhorst >> Looks good overall, but It would be great with some more documentation so that if a test fails, it's easier to understand what is failing, see below. >> >>>> --- >>>>   tests/meson.build     |   1 + >>>>   tests/xe/xe_evicted.c | 313 ++++++++++++++++++++++++++++++++++++++++++ >>>>   2 files changed, 314 insertions(+) >>>>   create mode 100644 tests/xe/xe_evicted.c >>>> >>>> diff --git a/tests/meson.build b/tests/meson.build >>>> index 6551194fe..166937b6c 100644 >>>> --- a/tests/meson.build >>>> +++ b/tests/meson.build >>>> @@ -248,6 +248,7 @@ xe_progs = [ >>>>       'xe_dma_buf_sync', >>>>       'xe_debugfs', >>>>       'xe_evict', >>>> +    'xe_evicted', >>>>       'xe_exec_balancer', >>>>       'xe_exec_basic', >>>>       'xe_exec_compute_mode', >>>> diff --git a/tests/xe/xe_evicted.c b/tests/xe/xe_evicted.c >>>> new file mode 100644 >>>> index 000000000..3f5e6a82c >>>> --- /dev/null >>>> +++ b/tests/xe/xe_evicted.c >>>> @@ -0,0 +1,313 @@ >>>> +// SPDX-License-Identifier: MIT >>>> +/* >>>> + * Copyright © 2023 Intel Corporation >>>> + */ >>>> + >>>> +/** >>>> + * TEST: Test various eviction and ENOMEM corner cases. >>>> + * Category: Software building block >>>> + * Sub-category: VM_BIND / eviction >>>> + * Test category: functionality test >>>> + */ >>>> + >>>> +#include "igt.h" >>>> +#include "lib/igt_syncobj.h" >>>> +#include "lib/intel_reg.h" >>>> +#include "xe_drm.h" >>>> + >>>> +#include "xe/xe_ioctl.h" >>>> +#include "xe/xe_query.h" >>>> +#include "xe/xe_spin.h" >>>> +#include >>>> + >>>> +static void evict_mem(struct xe_device *xe) >>>> +{ >>>> +    if (xe->has_vram) >>>> +        igt_debugfs_write(xe->fd, "vram0_evict", "1"); >>>> +    else >>>> +        igt_debugfs_write(xe->fd, "gtt_evict", "1"); >>>> +} >>>> + >>>> +static void execstress(struct xe_device *xe, uint32_t engine, uint32_t addr) >>>> +{ >>>> +    struct timespec start, end; >>>> + >>>> +    igt_fork(child, 2) { >>>> +        struct drm_xe_sync sync = { >>>> +            .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, >>>> +            .handle = syncobj_create(xe->fd, 0), >>>> +        }; >>>> + >>>> +        clock_gettime(CLOCK_MONOTONIC, &start); >>>> + >>>> +        do { >>>> +            if (!child) >>>> +                evict_mem(xe); >>>> +            else >>>> +                xe_exec_sync(xe->fd, engine, addr, &sync, 1); >>>> + >>>> +            clock_gettime(CLOCK_MONOTONIC, &end); >>>> +        } while (end.tv_sec - start.tv_sec <= 3); >>>> + >>>> +        if (child) >>>> +            igt_assert(syncobj_wait(xe->fd, &sync.handle, 1, INT64_MAX, 0, NULL)); >>>> +        syncobj_destroy(xe->fd, sync.handle); >>>> +    } >>>> + >>>> +    igt_waitchildren(); >>>> + >>>> +    /* Leave function in a known state */ >>>> +    evict_mem(xe); >>>> +} >>>> + >>>> +/* See issue 194, unbinding first VMA may cause BO not to be locked correctly */ >>>> +static void test_multimap(struct xe_device *xe, bool external) >>>> +{ >>>> +    uint64_t size = xe->default_alignment; >>>> +    uint32_t vm = xe_vm_create(xe->fd, DRM_XE_VM_CREATE_SCRATCH_PAGE, 0); >>>> +    uint32_t bo = xe_bo_create(xe->fd, 0, external ? 0 : vm, size); >>>> +    uint32_t engine = xe_engine_create_class(xe->fd, vm, DRM_XE_ENGINE_CLASS_COPY); >>>> +    uint32_t *map = xe_bo_map(xe->fd, bo, size); >>>> + >>>> +    *map = MI_BATCH_BUFFER_END; >>>> +    munmap(map, size); >>>> + >>>> +    /* Create 2 mappings */ >>>> +    xe_vm_bind_sync(xe->fd, vm, bo, 0, 0, size); >>>> +    xe_vm_bind_sync(xe->fd, vm, bo, 0, size, size); >>>> + >>>> +    execstress(xe, engine, size); >>>> + >>>> +    /* Unbind the first one, to possibly confuse stuff */ >>>> +    xe_vm_unbind_sync(xe->fd, vm, 0, 0, size); >>>> + >>>> +    execstress(xe, engine, size); >>>> + >>>> +    /* Teardown.. */ >>>> +    xe_engine_destroy(xe->fd, engine); >>>> +    xe_vm_destroy(xe->fd, vm); >>>> +    gem_close(xe->fd, bo); >>>> +} >>>> + >>>> +static uint64_t avail_vram(struct xe_device **xe) >>>> +{ >>>> +    int fd = (*xe)->fd; >>>> +    int region_idx; >>>> + >>>> +    /* Refresh to get up-to-date values */ >>>> +    xe_device_put(fd); >>>> +    *xe = xe_device_get(fd); >>>> + >>>> +    region_idx = ffs((*xe)->gts->gts[0].native_mem_regions) - 1; >>>> + >>>> +    return (*xe)->mem_usage->regions[region_idx].total_size - >>>> +           (*xe)->mem_usage->regions[region_idx].used; >>>> +} >>>> + >>>> +static void test_pt_eviction(struct xe_device *xe, bool external) >> Could we have an overall description on what this test does and how it does it? > I put a small description in the test comments, basically try to force a pt eviction. >>>> +{ >>>> +    uint32_t bo, i; >>>> +    uint64_t size, full_size, full_addr, addr, avail; >>>> +    uint32_t vm; >>>> +    uint32_t engine; >>>> +    uint32_t *map; >>>> +    struct drm_xe_exec exec = {}; >>>> +    igt_require(xe->has_vram); >>>> + >>>> +    vm = xe_vm_create(xe->fd, 0, 0); >>>> +    engine = xe_engine_create_class(xe->fd, vm, DRM_XE_ENGINE_CLASS_COPY); >>>> + >>>> +    /* Refresh xe_device size after creating vm + engine */ >>>> +    evict_mem(xe); >>>> +    full_size = (avail_vram(&xe) - (2 << 20ULL)) & ~((2 << 20ULL) - 1); >> And what we are doing here and what the magical shifts come from? >  Should probably add a define for 2 megabyte instead. 1 << 20 is 1 mb. :) >>>> + >>>> +    bo = xe_bo_create(xe->fd, 0, external ? 0 : vm, full_size); >>>> + >>>> +    /* Only map what we need.. */ >>>> +    map = xe_bo_map(xe->fd, bo, xe->default_alignment); >>>> +    *map = MI_BATCH_BUFFER_END; >>>> +    munmap(map, xe->default_alignment); >>>> + >>>> +    /* Stuff bo at the end, so we can use start for partial bo's */ >>>> +    full_addr = (1ULL << (uint64_t)xe_va_bits(xe->fd)) - full_size; >>>> +    xe_vm_bind_sync(xe->fd, vm, bo, 0, full_addr, full_size); >>>> + >>>> +    exec.engine_id = engine; >>>> +    exec.num_batch_buffer = 1; >>>> +    exec.address = full_addr; >>>> + >>>> +    /* Now map partially at 0, and see how many pagetables we can still fill.. */ >>>> +    addr = 0; >>>> +    size = xe->default_alignment; >>>> +    xe_vm_bind_sync(xe->fd, vm, bo, 0, addr, size); >>>> + >>>> +    xe_exec_sync(xe->fd, engine, full_addr, NULL, 0); >>>> + >>>> +    avail = full_size >> 12; >>>> +    while (1) { >>>> +        uint64_t new_avail = avail_vram(&xe) >> 12; >>>> + >>>> +        if (new_avail >= avail) >>>> +            igt_info("TTM delayed destroy may have freed some memory\n"); >>>> +        avail = new_avail; >>>> + >>>> +        igt_info("%"PRIu64" available 4K pages left\n", avail); >>>> +        if (!avail) >>>> +            break; >>>> + >>>> +        if (avail == 1) { >>>> +            xe_vm_bind_sync(xe->fd, vm, bo, 0, 2 << 20ULL, size); >>>> +            break; >>>> +        } >>>> + >>>> +        addr += 1 << 30ULL; >>>> + >>>> +        /* Remove 1 for second level PT */ >>>> +        avail--; >>>> + >>>> +        for (i = 0; i < min((uint64_t)512, avail); i++) { >>>> +            uint64_t this_addr = addr + (i << 21); >>>> + >>>> +            xe_vm_bind_sync(xe->fd, vm, bo, 0, this_addr, size); >>>> +        } >>>> +    } >>>> + >>>> +    /* Verify that with 0 memory available, exec still completes */ >>>> +    xe_exec_sync(xe->fd, engine, full_addr, NULL, 0); >>>> + >>>> +    /* Bind 1 more to go over the edge, this should never succeed.. */ >>>> +    if (__xe_vm_bind(xe->fd, vm, 0, bo, 0, 4 << 20ULL, size, >>>> +             XE_VM_BIND_OP_MAP, NULL, 0, 0, 0) == 0) { >>>> + >>>> +        /* Boom! */ >>>> +        igt_assert(igt_ioctl(xe->fd, DRM_IOCTL_XE_EXEC, &exec) < 0 && >>>> +               errno == ENOMEM); >>>> + >>>> +        xe_vm_unbind_sync(xe->fd, vm, 0, 4 << 20ULL, size); >>>> +    } else { >>>> +        igt_assert(errno == ENOMEM); >>>> +    } >>>> + >>>> +    /* >>>> +     * After evicting, we may end up creating new page tables, >>>> +     * so this should fail.. >>>> +     */ >>>> +    evict_mem(xe); >>>> +    igt_assert(igt_ioctl(xe->fd, DRM_IOCTL_XE_EXEC, &exec) < 0 && >>>> +           errno == ENOMEM); >>>> + >>>> +    /* Cleanup */ >>>> +    xe_engine_destroy(xe->fd, engine); >>>> +    xe_vm_destroy(xe->fd, vm); >>>> +    gem_close(xe->fd, bo); >>>> +} >>>> + >>>> +/* Test OOM with signal handler enabled, to mimic X.org (issue #239) */ >> Same thing here, what's done and how is it done? Are we actually getting a signal here, or just wait interruptible and may or may not get a signal? > We attempt to cause a signal through igt_while_interruptible, it changes the igt_ioctl pointer from drmIoctl to sig_ioctl, > > which attempts to cause as many EINTR's as possible. That's why igt uses igt_ioctl internally and not drmIoctl. Ah, ok. Thanks for the explanation. /Thomas > > >> Thanks, >> >> Thomas > ~Maarten