From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
To: igt-dev@lists.freedesktop.org
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Subject: [PATCH] tests/intel: Add xe_coredump test, v3.
Date: Tue, 11 Jun 2024 12:13:22 +0200 [thread overview]
Message-ID: <20240611101323.19444-1-maarten.lankhorst@linux.intel.com> (raw)
Add a simple test that forces a GPU hang and then reads the resulting
devcoredump file. Map a single userptr and BO, and dump the contents of
those.
Changes since v1:
- Almost completely rewrite test for readability, based on feedback.
Changes since v2:
- Remove retrying opening fd, 1s wait was missing and test passed without.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
tests/intel/xe_coredump.c | 251 ++++++++++++++++++++++++++++++++++++++
tests/meson.build | 1 +
2 files changed, 252 insertions(+)
create mode 100644 tests/intel/xe_coredump.c
diff --git a/tests/intel/xe_coredump.c b/tests/intel/xe_coredump.c
new file mode 100644
index 000000000..938b718db
--- /dev/null
+++ b/tests/intel/xe_coredump.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+/**
+ * TEST: Check devcoredump functionality
+ * Category: Software building block
+ * Sub-category: devcoredump
+ * Run type: BAT
+ * Functionality: Error dumping and readout.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <glob.h>
+#include <string.h>
+#include <wchar.h>
+#include <sys/stat.h>
+
+#include "igt.h"
+#include "igt_device.h"
+#include "igt_io.h"
+#include "igt_syncobj.h"
+#include "igt_sysfs.h"
+
+#include "intel_pat.h"
+
+#include "xe_drm.h"
+#include "xe/xe_ioctl.h"
+#include "xe/xe_query.h"
+
+#ifndef DRM_XE_VM_BIND_FLAG_DUMPABLE
+#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1<<3)
+#endif
+
+static struct xe_device *xe;
+static uint32_t batch_bo;
+static uint32_t *batch;
+static void *userptr;
+static uint32_t vm;
+static int sysfd;
+
+#define MAX_N_ENGINES 32
+
+static void tryclear_hang(void)
+{
+ int fd = openat(sysfd, "devcoredump/data", O_RDWR);
+ char buf[256];
+
+ if (fd < 0)
+ return;
+
+ while (read(fd, buf, sizeof(buf)) > 0)
+ { }
+ write(fd, "1", 1);
+ close(fd);
+}
+
+/*
+ * Helper to read and clear devcore. We want to read it completely to ensure
+ * we catch any kernel side regressions like:
+ * https://gitlab.freedesktop.org/drm/msm/-/issues/20
+ */
+static void
+read_and_clear_hang(void)
+{
+ char buf[0x1000];
+ int fd = openat(sysfd, "devcoredump/data", O_RDWR);
+ igt_assert(fd >= 0);
+
+ /*
+ * We want to read the entire file but we can throw away the
+ * contents.. we just want to make sure that we exercise the
+ * kernel side codepaths hit when reading the devcore from
+ * sysfs
+ */
+ igt_debug("---- begin coredump ----\n");
+ while (1) {
+ ssize_t ret;
+
+ ret = igt_readn(fd, buf, sizeof(buf) - 1);
+ igt_assert(ret >= 0);
+ if (ret == 0)
+ break;
+ buf[ret] = '\0';
+ igt_debug("%s", buf);
+ }
+
+ igt_debug("---- end coredump ----\n");
+
+ /* Clear the devcore: */
+ igt_writen(fd, "1", 1);
+
+ close(fd);
+}
+
+static void free_execqueue(void)
+{
+ int fd = xe->fd;
+ xe_vm_destroy(fd, vm);
+ vm = 0;
+ gem_close(fd, batch_bo);
+ munmap(batch, xe->default_alignment);
+ munmap(userptr, xe->default_alignment);
+ batch = userptr = NULL;
+}
+
+static void recreate_execqueue(bool dumpable)
+{
+ struct drm_xe_sync sync = {
+ .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
+ .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+ };
+ struct drm_xe_vm_bind_op bind_ops[2] = { };
+ int fd = xe->fd;
+ uint32_t *ptr;
+ uint64_t offset = xe->default_alignment - 4;
+
+ tryclear_hang();
+
+ if (vm)
+ free_execqueue();
+
+ vm = xe_vm_create(fd, 0, 0);
+ batch_bo = xe_bo_create(fd, vm, xe->default_alignment, system_memory(fd), 0);
+ ptr = batch = xe_bo_map(xe->fd, batch_bo, xe->default_alignment);
+
+ memset(batch, 0, xe->default_alignment);
+ *(ptr++) = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_GTE_SDD;
+ *(ptr++) = 1;
+ *(ptr++) = offset >> 32;
+ *(ptr++) = offset;
+ *(ptr++) = MI_BATCH_BUFFER_END;
+
+ userptr = mmap(0, xe->default_alignment, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+ wmemset(userptr, 0xf1234567, xe->default_alignment / sizeof(wchar_t));
+
+ bind_ops[0].op = DRM_XE_VM_BIND_OP_MAP;
+ bind_ops[0].obj = batch_bo;
+ bind_ops[0].addr = 0;
+
+ bind_ops[1].op = DRM_XE_VM_BIND_OP_MAP_USERPTR;
+ bind_ops[1].userptr = (size_t)userptr;
+ bind_ops[1].addr = 1ULL << 40ULL;
+
+ if (dumpable)
+ bind_ops[0].flags = bind_ops[1].flags = DRM_XE_VM_BIND_FLAG_DUMPABLE;
+ bind_ops[0].range = bind_ops[1].range = xe->default_alignment;
+ bind_ops[0].pat_index = bind_ops[1].pat_index = intel_get_pat_idx_wb(fd);
+
+ sync.handle = syncobj_create(fd, 0);
+ xe_vm_bind_array(fd, vm, 0, bind_ops, ARRAY_SIZE(bind_ops), &sync, 1);
+ syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
+ syncobj_destroy(fd, sync.handle);
+}
+
+static uint32_t hang_engine(struct drm_xe_engine_class_instance *hwe)
+{
+ uint32_t engine;
+ struct drm_xe_sync sync = {
+ .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
+ .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+ .handle = syncobj_create(xe->fd, 0),
+ };
+
+ engine = xe_exec_queue_create(xe->fd, vm, hwe, 0);
+ xe_exec_sync(xe->fd, engine, 0, &sync, 1);
+
+ return sync.handle;
+}
+
+static void test_hang_one(void)
+{
+ uint32_t syncobj = hang_engine(&xe_engine(xe->fd, 0)->instance);
+
+ syncobj_wait(xe->fd, &syncobj, 1, INT64_MAX, 0, NULL);
+ syncobj_destroy(xe->fd, syncobj);
+
+ read_and_clear_hang();
+}
+
+/**
+ * SUBTEST: basic
+ * Description: Read out a full dumped VM.
+ * Test category: functionality test
+ */
+static void basic(void)
+{
+ recreate_execqueue(true);
+ test_hang_one();
+}
+
+/**
+ * SUBTEST: empty-vm
+ * Description: Create an error dump without anything in VM to dump.
+ * Test category: functionality test
+ */
+static void empty_vm(void)
+{
+ recreate_execqueue(false);
+ test_hang_one();
+}
+
+/**
+ * SUBTEST: all-simultaneously
+ * Description: Hang all engines at the same time, read out the dump.
+ * Test category: robustness test
+ */
+static void all_simultaneously(void)
+{
+ uint32_t syncobj[MAX_N_ENGINES], i = 0;
+ struct drm_xe_engine_class_instance *hwe;
+
+ recreate_execqueue(true);
+ xe_for_each_engine(xe->fd, hwe)
+ syncobj[i++] = hang_engine(hwe);
+
+ syncobj_wait(xe->fd, syncobj, i, INT64_MAX, 0, NULL);
+ while (i--)
+ syncobj_destroy(xe->fd, syncobj[i]);
+
+ read_and_clear_hang();
+}
+
+igt_main
+{
+ igt_fixture {
+ struct stat stat;
+ int fd = drm_open_driver_render(DRIVER_XE);
+ char str[256];
+ xe = xe_device_get(fd);
+
+ igt_assert_eq(fstat(fd, &stat), 0);
+ sprintf(str, "/sys/dev/char/%ld:%ld/device", stat.st_rdev >> 8, stat.st_rdev & 0xff);
+ sysfd = open(str, O_DIRECTORY);
+ igt_assert(sysfd >= 0);
+ }
+
+ igt_describe("Test that hw fault coredump readout works");
+ igt_subtest("basic")
+ basic();
+
+ igt_describe("Hang all engines simultaneously");
+ igt_subtest("all-simultaneously")
+ all_simultaneously();
+
+ igt_describe("Ensure that snapshot works without anything to capture");
+ igt_subtest("empty-vm")
+ empty_vm();
+}
diff --git a/tests/meson.build b/tests/meson.build
index 758ae090c..f611e2e2c 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -280,6 +280,7 @@ intel_xe_progs = [
'xe_compute',
'xe_compute_preempt',
'xe_copy_basic',
+ 'xe_coredump',
'xe_dma_buf_sync',
'xe_debugfs',
'xe_drm_fdinfo',
--
2.43.0
next reply other threads:[~2024-06-11 10:20 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-11 10:13 Maarten Lankhorst [this message]
2024-06-11 11:56 ` ✓ CI.xeBAT: success for tests/intel: Add xe_coredump test, v3 Patchwork
2024-06-11 12:06 ` ✓ Fi.CI.BAT: " Patchwork
2024-06-11 16:13 ` ✗ CI.xeFULL: failure " Patchwork
2024-06-11 16:21 ` ✗ Fi.CI.IGT: " Patchwork
2024-06-11 22:17 ` [PATCH] " Matthew Brost
2024-06-13 23:27 ` John Harrison
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240611101323.19444-1-maarten.lankhorst@linux.intel.com \
--to=maarten.lankhorst@linux.intel.com \
--cc=igt-dev@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox