* [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes.
@ 2024-01-12 12:40 Maarten Lankhorst
2024-01-12 12:40 ` [PATCH 2/2] tests/intel: Add xe_coredump test Maarten Lankhorst
` (3 more replies)
0 siblings, 4 replies; 8+ messages in thread
From: Maarten Lankhorst @ 2024-01-12 12:40 UTC (permalink / raw)
To: igt-dev
Required for devcoredump test.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
include/drm-uapi/xe_drm.h | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h
index bacdca787..0f975f57f 100644
--- a/include/drm-uapi/xe_drm.h
+++ b/include/drm-uapi/xe_drm.h
@@ -3,8 +3,8 @@
* Copyright © 2023 Intel Corporation
*/
-#ifndef _XE_DRM_H_
-#define _XE_DRM_H_
+#ifndef _UAPI_XE_DRM_H_
+#define _UAPI_XE_DRM_H_
#include "drm.h"
@@ -130,7 +130,7 @@ extern "C" {
* redefine the interface more easily than an ever growing struct of
* increasing complexity, and for large parts of that interface to be
* entirely optional. The downside is more pointer chasing; chasing across
- * the boundary with pointers encapsulated inside u64.
+ * the __user boundary with pointers encapsulated inside u64.
*
* Example chaining:
*
@@ -832,7 +832,6 @@ struct drm_xe_vm_destroy {
*
* and the @flags can be:
* - %DRM_XE_VM_BIND_FLAG_READONLY
- * - %DRM_XE_VM_BIND_FLAG_ASYNC
* - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the
* MAP operation immediately rather than deferring the MAP to the page
* fault handler.
@@ -931,6 +930,7 @@ struct drm_xe_vm_bind_op {
#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0)
#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1)
#define DRM_XE_VM_BIND_FLAG_NULL (1 << 2)
+#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3)
/** @flags: Bind flags */
__u32 flags;
@@ -1344,4 +1344,4 @@ struct drm_xe_wait_user_fence {
}
#endif
-#endif /* _XE_DRM_H_ */
+#endif /* _UAPI_XE_DRM_H_ */
--
2.40.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/2] tests/intel: Add xe_coredump test
2024-01-12 12:40 [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Maarten Lankhorst
@ 2024-01-12 12:40 ` Maarten Lankhorst
2024-01-12 16:53 ` Kamil Konieczny
2024-01-12 13:09 ` [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Francois Dugast
` (2 subsequent siblings)
3 siblings, 1 reply; 8+ messages in thread
From: Maarten Lankhorst @ 2024-01-12 12:40 UTC (permalink / raw)
To: igt-dev
Add a simple test that forces a GPU hang and then reads the resulting
devcoredump file. Map a single userptr and BO, and dump the contents of
those.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
tests/intel/xe_coredump.c | 188 ++++++++++++++++++++++++++++++++++++++
tests/meson.build | 1 +
2 files changed, 189 insertions(+)
create mode 100644 tests/intel/xe_coredump.c
diff --git a/tests/intel/xe_coredump.c b/tests/intel/xe_coredump.c
new file mode 100644
index 000000000..9db79bb1d
--- /dev/null
+++ b/tests/intel/xe_coredump.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+/**
+ * TEST: Check devcoredump functionality
+ * Category: Software building block
+ * Sub-category: devcoredump
+ * Test category: functionality test
+ * Run type: BAT
+ * Description: Test devcoredump functionality and dumping work as intended.
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <glob.h>
+#include <wchar.h>
+
+#include "igt.h"
+#include "igt_device.h"
+#include "igt_io.h"
+#include "igt_syncobj.h"
+#include "igt_sysfs.h"
+
+#include "intel_pat.h"
+
+#include "xe_drm.h"
+#include "xe/xe_ioctl.h"
+#include "xe/xe_query.h"
+
+static struct xe_device *xe;
+static uint32_t batch_bo;
+static uint32_t *batch;
+static void *userptr;
+static uint32_t vm;
+
+#define MAX_N_ENGINES 32
+
+/*
+ * Helper to read and clear devcore. We want to read it completely to ensure
+ * we catch any kernel side regressions like:
+ * https://gitlab.freedesktop.org/drm/msm/-/issues/20
+ */
+
+static void
+read_and_clear_devcore(bool match)
+{
+ glob_t glob_buf = {0};
+ int ret, i;
+
+ ret = glob("/sys/class/devcoredump/devcd*/data", GLOB_NOSORT, NULL, &glob_buf);
+ if ((ret == GLOB_NOMATCH) || !glob_buf.gl_pathc)
+ return;
+
+ for (i = 0; i < glob_buf.gl_pathc; i++) {
+ char buf[0x1000];
+ int fd = open(glob_buf.gl_pathv[i], O_RDWR);
+
+ if (fd == -1)
+ continue;
+
+ /*
+ * We want to read the entire file but we can throw away the
+ * contents.. we just want to make sure that we exercise the
+ * kernel side codepaths hit when reading the devcore from
+ * sysfs
+ */
+ igt_debug("---- begin coredump ----\n");
+ do {
+ memset(buf, 0, sizeof(buf));
+ ret = igt_readn(fd, buf, sizeof(buf) - 1);
+ igt_debug("%s", buf);
+ } while (ret > 0);
+
+ igt_debug("---- end coredump ----\n");
+
+ /* Clear the devcore: */
+ igt_writen(fd, "1", 1);
+
+ close(fd);
+ match = false;
+ }
+
+ globfree(&glob_buf);
+ igt_assert_f(!match, "No devcoredump found.\n");
+}
+
+/**
+ * SUBTEST: %s
+ * Description: Hang the GPU, and read out the resulting devcoredump.
+ * Test category: functionality test
+ */
+static void
+do_hang_test(bool all)
+{
+ uint32_t *ptr = batch;
+ uint64_t offset = xe->default_alignment - 4;
+ struct drm_xe_engine_class_instance *hwe;
+ uint32_t engines[MAX_N_ENGINES], syncobjs[MAX_N_ENGINES];
+ int i = 0;
+
+ memset(batch, 0, xe->default_alignment);
+ *(ptr++) = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_GTE_SDD;
+ *(ptr++) = 1;
+ *(ptr++) = offset >> 32;
+ *(ptr++) = offset;
+ *(ptr++) = MI_BATCH_BUFFER_END;
+
+ xe_for_each_engine(xe->fd, hwe) {
+ struct drm_xe_sync sync = {
+ .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
+ .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+ };
+
+ sync.handle = syncobjs[i] = syncobj_create(xe->fd, 0);
+ engines[i] = xe_exec_queue_create(xe->fd, vm, hwe, 0);
+ xe_exec_sync(xe->fd, engines[i], 0, &sync, 1);
+ i++;
+ if (!all)
+ break;
+ }
+
+ /* Ensure all syncobjs are in a failed state */
+ while (i--) {
+ syncobj_wait(xe->fd, &syncobjs[i], 1, INT64_MAX, 0, NULL);
+ syncobj_destroy(xe->fd, syncobjs[i]);
+ }
+}
+
+igt_main
+{
+ igt_fixture {
+ struct drm_xe_sync sync = {
+ .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
+ .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+ };
+ struct drm_xe_vm_bind_op bind_ops[2] = { };
+
+ int fd = drm_open_driver_render(DRIVER_XE);
+ xe = xe_device_get(fd);
+
+ /* Before test, ensure devcoredump is empty */
+ read_and_clear_devcore(false);
+
+ vm = xe_vm_create(fd, 0, 0);
+ batch_bo = xe_bo_create(fd, vm, xe->default_alignment, system_memory(fd), 0);
+ batch = xe_bo_map(xe->fd, batch_bo, xe->default_alignment);
+
+ userptr = mmap(0, xe->default_alignment, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+ wmemset(userptr, 0xf1234567, xe->default_alignment / sizeof(wchar_t));
+
+ bind_ops[0].op = DRM_XE_VM_BIND_OP_MAP;
+ bind_ops[0].obj = batch_bo;
+ bind_ops[0].addr = 0;
+
+ bind_ops[1].op = DRM_XE_VM_BIND_OP_MAP_USERPTR;
+ bind_ops[1].userptr = (size_t)userptr;
+ bind_ops[1].addr = 1ULL << 40ULL;
+
+ bind_ops[0].flags = bind_ops[1].flags = DRM_XE_VM_BIND_FLAG_DUMPABLE;
+ bind_ops[0].range = bind_ops[1].range = xe->default_alignment;
+ bind_ops[0].pat_index = bind_ops[1].pat_index = intel_get_pat_idx_wb(fd);
+
+ sync.handle = syncobj_create(fd, 0);
+ xe_vm_bind_array(fd, vm, 0, bind_ops, ARRAY_SIZE(bind_ops), &sync, 1);
+ syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
+ syncobj_destroy(fd, sync.handle);
+
+ }
+
+ igt_describe("Test that hw fault coredump readout works");
+ igt_subtest("basic") {
+ do_hang_test(false);
+
+ read_and_clear_devcore(true);
+ }
+
+ igt_describe("Hang all engines simultaneously");
+ igt_subtest("all-simultaneously") {
+ do_hang_test(true);
+
+ read_and_clear_devcore(true);
+ }
+}
+
diff --git a/tests/meson.build b/tests/meson.build
index a6a8498e2..cb19627e1 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -279,6 +279,7 @@ intel_xe_progs = [
'xe_create',
'xe_compute',
'xe_copy_basic',
+ 'xe_coredump',
'xe_dma_buf_sync',
'xe_debugfs',
'xe_drm_fdinfo',
--
2.40.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes.
2024-01-12 12:40 [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Maarten Lankhorst
2024-01-12 12:40 ` [PATCH 2/2] tests/intel: Add xe_coredump test Maarten Lankhorst
@ 2024-01-12 13:09 ` Francois Dugast
2024-01-12 13:39 ` ✗ Fi.CI.BUILD: failure for series starting with [1/2] " Patchwork
2024-01-12 16:05 ` [PATCH 1/2] " Kamil Konieczny
3 siblings, 0 replies; 8+ messages in thread
From: Francois Dugast @ 2024-01-12 13:09 UTC (permalink / raw)
To: Maarten Lankhorst; +Cc: igt-dev
Hi Maarten,
On Fri, Jan 12, 2024 at 01:40:42PM +0100, Maarten Lankhorst wrote:
> Required for devcoredump test.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
> include/drm-uapi/xe_drm.h | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h
> index bacdca787..0f975f57f 100644
> --- a/include/drm-uapi/xe_drm.h
> +++ b/include/drm-uapi/xe_drm.h
> @@ -3,8 +3,8 @@
> * Copyright © 2023 Intel Corporation
> */
>
> -#ifndef _XE_DRM_H_
> -#define _XE_DRM_H_
> +#ifndef _UAPI_XE_DRM_H_
> +#define _UAPI_XE_DRM_H_
This ^ and __user below seem to indicate the header was copied from
KMD sources instead of being generated, for example with:
$ make headers_install INSTALL_HDR_PATH=/tmp/headers
See https://www.kernel.org/doc/html/latest/kbuild/headers_install.html
Francois
>
> #include "drm.h"
>
> @@ -130,7 +130,7 @@ extern "C" {
> * redefine the interface more easily than an ever growing struct of
> * increasing complexity, and for large parts of that interface to be
> * entirely optional. The downside is more pointer chasing; chasing across
> - * the boundary with pointers encapsulated inside u64.
> + * the __user boundary with pointers encapsulated inside u64.
> *
> * Example chaining:
> *
> @@ -832,7 +832,6 @@ struct drm_xe_vm_destroy {
> *
> * and the @flags can be:
> * - %DRM_XE_VM_BIND_FLAG_READONLY
> - * - %DRM_XE_VM_BIND_FLAG_ASYNC
> * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the
> * MAP operation immediately rather than deferring the MAP to the page
> * fault handler.
> @@ -931,6 +930,7 @@ struct drm_xe_vm_bind_op {
> #define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0)
> #define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1)
> #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2)
> +#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3)
> /** @flags: Bind flags */
> __u32 flags;
>
> @@ -1344,4 +1344,4 @@ struct drm_xe_wait_user_fence {
> }
> #endif
>
> -#endif /* _XE_DRM_H_ */
> +#endif /* _UAPI_XE_DRM_H_ */
> --
> 2.40.1
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* ✗ Fi.CI.BUILD: failure for series starting with [1/2] tests/intel: Sync headers to grab DUMPABLE changes.
2024-01-12 12:40 [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Maarten Lankhorst
2024-01-12 12:40 ` [PATCH 2/2] tests/intel: Add xe_coredump test Maarten Lankhorst
2024-01-12 13:09 ` [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Francois Dugast
@ 2024-01-12 13:39 ` Patchwork
2024-01-12 16:05 ` [PATCH 1/2] " Kamil Konieczny
3 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2024-01-12 13:39 UTC (permalink / raw)
To: Maarten Lankhorst; +Cc: igt-dev
== Series Details ==
Series: series starting with [1/2] tests/intel: Sync headers to grab DUMPABLE changes.
URL : https://patchwork.freedesktop.org/series/128716/
State : failure
== Summary ==
IGT patchset build failed on latest successful build
16d7484a28cc1aaf2dfb9e9c73c011daf602d926 tests/intel/xe_exec_store: don't pass gt_id as bind queue
Tail of build.log:
[1615/1664] Linking target tools/intel_panel_fitter.
[1616/1664] Linking target tools/intel_stepping.
[1617/1664] Linking target tools/intel_gem_info.
[1618/1664] Linking target tools/intel_firmware_decode.
[1619/1664] Linking target tools/intel_pm_rpm.
[1620/1664] Linking target tools/intel_watermark.
[1621/1664] Linking target tools/intel_gvtg_test.
[1622/1664] Linking target runner/testdata/abort-simple.
[1623/1664] Linking target tools/amd_hdmi_compliance.
[1624/1664] Generating gem_stress.testlist with a meson_exe.py custom command.
[1625/1664] Linking target runner/igt_comms_decoder.
[1626/1664] Linking target tools/msm_dp_compliance.
[1627/1664] Linking target tools/intel_l3_parity.
[1628/1664] Linking target runner/testdata/no-subtests.
[1629/1664] Linking target runner/testdata/abort.
[1630/1664] Linking target runner/testdata/skippers.
[1631/1664] Linking target tools/intel_reg.
[1632/1664] Linking target runner/testdata/abort-dynamic.
[1633/1664] Linking target runner/runner_json_test.
[1634/1664] Linking target runner/testdata/successtest.
[1635/1664] Linking target tools/lsgpu.
[1636/1664] Linking target runner/testdata/dynamic.
[1637/1664] Linking target runner/igt_runner.
[1638/1664] Linking target runner/igt_resume.
[1639/1664] Linking target runner/testdata/abort-fixture.
[1640/1664] Linking target runner/igt_results.
[1641/1664] Compiling C object 'lib/76b5a35@@i915_perf@sha/meson-generated_.._i915_perf_metrics_mtlgt3.c.o'.
[1642/1664] Compiling C object 'lib/76b5a35@@i915_perf@sha/meson-generated_.._i915_perf_metrics_acmgt1.c.o'.
[1643/1664] Compiling C object 'runner/527aa9f@@runner_test@exe/runner_tests.c.o'.
[1644/1664] Linking target runner/runner_test.
[1645/1664] Compiling C object 'lib/76b5a35@@i915_perf@sha/meson-generated_.._i915_perf_metrics_acmgt2.c.o'.
[1646/1664] Compiling C object 'lib/76b5a35@@i915_perf@sha/meson-generated_.._i915_perf_metrics_acmgt3.c.o'.
[1647/1664] Linking target lib/libi915_perf.so.1.5.
[1648/1664] Generating symbol file 'lib/76b5a35@@i915_perf@sha/libi915_perf.so.1.5.symbols'.
[1649/1664] Linking target tools/i915-perf/i915-perf-configs.
[1650/1664] Linking target tools/i915-perf/i915-perf-reader.
[1651/1664] Linking target tools/i915-perf/i915-perf-recorder.
[1652/1664] Linking target tests/gem_barrier_race.
[1653/1664] Linking target tests/core_hotunplug.
[1654/1664] Linking target tests/perf.
[1655/1664] Generating perf.testlist with a meson_exe.py custom command.
[1656/1664] Generating gem_barrier_race.testlist with a meson_exe.py custom command.
[1657/1664] Generating core_hotunplug.testlist with a meson_exe.py custom command.
[1658/1664] Generating xe_tests.rst with a custom command.
FAILED: docs/testplan/xe_tests.rst
/usr/src/igt-gpu-tools/scripts/igt_doc.py --config /usr/src/igt-gpu-tools/tests/intel/xe_test_config.json --rest docs/testplan/xe_tests.rst --check-testlist --igt-build-path /opt/igt/build
/usr/src/igt-gpu-tools/tests/intel/xe_coredump.c:subtest igt@xe_coredump@%s needs 1 arguments but only 0 are defined.
[1659/1664] Generating i915_tests.rst with a custom command.
[1660/1664] Generating kms_tests.rst with a custom command.
ninja: build stopped: subcommand failed.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes.
2024-01-12 12:40 [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Maarten Lankhorst
` (2 preceding siblings ...)
2024-01-12 13:39 ` ✗ Fi.CI.BUILD: failure for series starting with [1/2] " Patchwork
@ 2024-01-12 16:05 ` Kamil Konieczny
3 siblings, 0 replies; 8+ messages in thread
From: Kamil Konieczny @ 2024-01-12 16:05 UTC (permalink / raw)
To: igt-dev
Hi Maarten,
On 2024-01-12 at 13:40:42 +0100, Maarten Lankhorst wrote:
> Required for devcoredump test.
please change subject and commit description to something like
in commit bc36d4f06ca0557371fda2e97361147d6ac4a3e6, so for subject:
drm-uapi/xe: Sync with DUMPABLE vm_bind flag
and for description cite commit from kernel (taken from above example):
Align with commit ("drm/xe/uapi: Remove sync binds")
If it was not accepted and you need it for testing, declare it in
test and send proper uapi/xe sync later.
Other way would be to introduce lib/xe/xe_drm_local.h like similar
lib/i915/i915_drm_local.h for i915.
+cc Francois
Regards,
Kamil
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
> include/drm-uapi/xe_drm.h | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h
> index bacdca787..0f975f57f 100644
> --- a/include/drm-uapi/xe_drm.h
> +++ b/include/drm-uapi/xe_drm.h
> @@ -3,8 +3,8 @@
> * Copyright © 2023 Intel Corporation
> */
>
> -#ifndef _XE_DRM_H_
> -#define _XE_DRM_H_
> +#ifndef _UAPI_XE_DRM_H_
> +#define _UAPI_XE_DRM_H_
>
> #include "drm.h"
>
> @@ -130,7 +130,7 @@ extern "C" {
> * redefine the interface more easily than an ever growing struct of
> * increasing complexity, and for large parts of that interface to be
> * entirely optional. The downside is more pointer chasing; chasing across
> - * the boundary with pointers encapsulated inside u64.
> + * the __user boundary with pointers encapsulated inside u64.
> *
> * Example chaining:
> *
> @@ -832,7 +832,6 @@ struct drm_xe_vm_destroy {
> *
> * and the @flags can be:
> * - %DRM_XE_VM_BIND_FLAG_READONLY
> - * - %DRM_XE_VM_BIND_FLAG_ASYNC
> * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the
> * MAP operation immediately rather than deferring the MAP to the page
> * fault handler.
> @@ -931,6 +930,7 @@ struct drm_xe_vm_bind_op {
> #define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0)
> #define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1)
> #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2)
> +#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3)
> /** @flags: Bind flags */
> __u32 flags;
>
> @@ -1344,4 +1344,4 @@ struct drm_xe_wait_user_fence {
> }
> #endif
>
> -#endif /* _XE_DRM_H_ */
> +#endif /* _UAPI_XE_DRM_H_ */
> --
> 2.40.1
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] tests/intel: Add xe_coredump test
2024-01-12 12:40 ` [PATCH 2/2] tests/intel: Add xe_coredump test Maarten Lankhorst
@ 2024-01-12 16:53 ` Kamil Konieczny
2024-01-15 12:34 ` Maarten Lankhorst
0 siblings, 1 reply; 8+ messages in thread
From: Kamil Konieczny @ 2024-01-12 16:53 UTC (permalink / raw)
To: igt-dev
Hi Maarten,
On 2024-01-12 at 13:40:43 +0100, Maarten Lankhorst wrote:
> Add a simple test that forces a GPU hang and then reads the resulting
> devcoredump file. Map a single userptr and BO, and dump the contents of
> those.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
> tests/intel/xe_coredump.c | 188 ++++++++++++++++++++++++++++++++++++++
> tests/meson.build | 1 +
> 2 files changed, 189 insertions(+)
> create mode 100644 tests/intel/xe_coredump.c
>
> diff --git a/tests/intel/xe_coredump.c b/tests/intel/xe_coredump.c
> new file mode 100644
> index 000000000..9db79bb1d
> --- /dev/null
> +++ b/tests/intel/xe_coredump.c
> @@ -0,0 +1,188 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2023 Intel Corporation
> + */
> +
> +/**
> + * TEST: Check devcoredump functionality
> + * Category: Software building block
> + * Sub-category: devcoredump
> + * Test category: functionality test
> + * Run type: BAT
> + * Description: Test devcoredump functionality and dumping work as intended.
Document also new subtest(s) here.
> + */
> +
> +#include <string.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <sys/stat.h>
> +#include <glob.h>
> +#include <wchar.h>
It should be sorted alphabetically but unistd.h is somewhat
special and should be first.
> +
> +#include "igt.h"
> +#include "igt_device.h"
> +#include "igt_io.h"
> +#include "igt_syncobj.h"
> +#include "igt_sysfs.h"
> +
> +#include "intel_pat.h"
> +
> +#include "xe_drm.h"
> +#include "xe/xe_ioctl.h"
> +#include "xe/xe_query.h"
> +
> +static struct xe_device *xe;
> +static uint32_t batch_bo;
> +static uint32_t *batch;
> +static void *userptr;
> +static uint32_t vm;
> +
> +#define MAX_N_ENGINES 32
> +
> +/*
> + * Helper to read and clear devcore. We want to read it completely to ensure
> + * we catch any kernel side regressions like:
> + * https://gitlab.freedesktop.org/drm/msm/-/issues/20
> + */
> +
> +static void
> +read_and_clear_devcore(bool match)
--------------------------^^^^
Could you get rid of bool param? It is better to write two
separate functions, one for clearing and one for dumping.
> +{
> + glob_t glob_buf = {0};
> + int ret, i;
> +
> + ret = glob("/sys/class/devcoredump/devcd*/data", GLOB_NOSORT, NULL, &glob_buf);
> + if ((ret == GLOB_NOMATCH) || !glob_buf.gl_pathc)
> + return;
> +
> + for (i = 0; i < glob_buf.gl_pathc; i++) {
> + char buf[0x1000];
> + int fd = open(glob_buf.gl_pathv[i], O_RDWR);
> +
> + if (fd == -1)
> + continue;
> +
> + /*
> + * We want to read the entire file but we can throw away the
> + * contents.. we just want to make sure that we exercise the
> + * kernel side codepaths hit when reading the devcore from
> + * sysfs
> + */
> + igt_debug("---- begin coredump ----\n");
> + do {
> + memset(buf, 0, sizeof(buf));
----------- ^
It is better to do it once before loop.
> + ret = igt_readn(fd, buf, sizeof(buf) - 1);
> + igt_debug("%s", buf);
Consider limiting it to some reasonable size to not clobber CI.
If you want you could add option to test to write it all.
> + } while (ret > 0);
> +
> + igt_debug("---- end coredump ----\n");
> +
> + /* Clear the devcore: */
> + igt_writen(fd, "1", 1);
Is it enough to write "1" to devcore to clear it? Or should it
be read out to get rid off the data?
> +
> + close(fd);
> + match = false;
Better do it once, not in loop.
> + }
> +
match = glob_buf.gl_pathc;
and later
igt_assert_f(match > 0, "No devcoredump found.\n");
> + globfree(&glob_buf);
> + igt_assert_f(!match, "No devcoredump found.\n");
> +}
> +
> +/**
> + * SUBTEST: %s
-------------- ^
When using param you should later give its values but here it is
overcomplicating things, just name it:
* SUBTEST: basic
> + * Description: Hang the GPU, and read out the resulting devcoredump.
-------------------------------^
No need for comma "," before "and".
Write in description what is the difference in second test.
> + * Test category: functionality test
Put here description for second test:
* SUBTEST: all-simultaneously
with other fields filled.
> + */
> +static void
> +do_hang_test(bool all)
> +{
> + uint32_t *ptr = batch;
> + uint64_t offset = xe->default_alignment - 4;
> + struct drm_xe_engine_class_instance *hwe;
> + uint32_t engines[MAX_N_ENGINES], syncobjs[MAX_N_ENGINES];
> + int i = 0;
> +
> + memset(batch, 0, xe->default_alignment);
> + *(ptr++) = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_GTE_SDD;
> + *(ptr++) = 1;
> + *(ptr++) = offset >> 32;
> + *(ptr++) = offset;
> + *(ptr++) = MI_BATCH_BUFFER_END;
> +
> + xe_for_each_engine(xe->fd, hwe) {
> + struct drm_xe_sync sync = {
> + .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
> + .flags = DRM_XE_SYNC_FLAG_SIGNAL,
> + };
> +
> + sync.handle = syncobjs[i] = syncobj_create(xe->fd, 0);
> + engines[i] = xe_exec_queue_create(xe->fd, vm, hwe, 0);
> + xe_exec_sync(xe->fd, engines[i], 0, &sync, 1);
> + i++;
> + if (!all)
> + break;
> + }
> +
> + /* Ensure all syncobjs are in a failed state */
> + while (i--) {
> + syncobj_wait(xe->fd, &syncobjs[i], 1, INT64_MAX, 0, NULL);
> + syncobj_destroy(xe->fd, syncobjs[i]);
> + }
> +}
> +
> +igt_main
> +{
> + igt_fixture {
> + struct drm_xe_sync sync = {
> + .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
> + .flags = DRM_XE_SYNC_FLAG_SIGNAL,
> + };
> + struct drm_xe_vm_bind_op bind_ops[2] = { };
> +
> + int fd = drm_open_driver_render(DRIVER_XE);
> + xe = xe_device_get(fd);
> +
> + /* Before test, ensure devcoredump is empty */
> + read_and_clear_devcore(false);
Better:
clear_devcore();
> +
> + vm = xe_vm_create(fd, 0, 0);
> + batch_bo = xe_bo_create(fd, vm, xe->default_alignment, system_memory(fd), 0);
> + batch = xe_bo_map(xe->fd, batch_bo, xe->default_alignment);
> +
> + userptr = mmap(0, xe->default_alignment, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> + wmemset(userptr, 0xf1234567, xe->default_alignment / sizeof(wchar_t));
> +
> + bind_ops[0].op = DRM_XE_VM_BIND_OP_MAP;
> + bind_ops[0].obj = batch_bo;
> + bind_ops[0].addr = 0;
> +
> + bind_ops[1].op = DRM_XE_VM_BIND_OP_MAP_USERPTR;
> + bind_ops[1].userptr = (size_t)userptr;
> + bind_ops[1].addr = 1ULL << 40ULL;
> +
> + bind_ops[0].flags = bind_ops[1].flags = DRM_XE_VM_BIND_FLAG_DUMPABLE;
> + bind_ops[0].range = bind_ops[1].range = xe->default_alignment;
> + bind_ops[0].pat_index = bind_ops[1].pat_index = intel_get_pat_idx_wb(fd);
> +
> + sync.handle = syncobj_create(fd, 0);
> + xe_vm_bind_array(fd, vm, 0, bind_ops, ARRAY_SIZE(bind_ops), &sync, 1);
> + syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
> + syncobj_destroy(fd, sync.handle);
> +
> + }
> +
> + igt_describe("Test that hw fault coredump readout works");
> + igt_subtest("basic") {
> + do_hang_test(false);
> +
> + read_and_clear_devcore(true);
Better:
read_and_clear_devcore();
> + }
> +
> + igt_describe("Hang all engines simultaneously");
> + igt_subtest("all-simultaneously") {
> + do_hang_test(true);
> +
> + read_and_clear_devcore(true);
Same here:
read_and_clear_devcore();
Regards,
Kamil
> + }
> +}
> +
> diff --git a/tests/meson.build b/tests/meson.build
> index a6a8498e2..cb19627e1 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -279,6 +279,7 @@ intel_xe_progs = [
> 'xe_create',
> 'xe_compute',
> 'xe_copy_basic',
> + 'xe_coredump',
> 'xe_dma_buf_sync',
> 'xe_debugfs',
> 'xe_drm_fdinfo',
> --
> 2.40.1
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] tests/intel: Add xe_coredump test
2024-01-12 16:53 ` Kamil Konieczny
@ 2024-01-15 12:34 ` Maarten Lankhorst
2024-01-15 15:47 ` Kamil Konieczny
0 siblings, 1 reply; 8+ messages in thread
From: Maarten Lankhorst @ 2024-01-15 12:34 UTC (permalink / raw)
To: Kamil Konieczny, igt-dev
Hey,
Den 2024-01-12 kl. 17:53, skrev Kamil Konieczny:
> Hi Maarten,
> On 2024-01-12 at 13:40:43 +0100, Maarten Lankhorst wrote:
>> Add a simple test that forces a GPU hang and then reads the resulting
>> devcoredump file. Map a single userptr and BO, and dump the contents of
>> those.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> ---
>> tests/intel/xe_coredump.c | 188 ++++++++++++++++++++++++++++++++++++++
>> tests/meson.build | 1 +
>> 2 files changed, 189 insertions(+)
>> create mode 100644 tests/intel/xe_coredump.c
>>
>> diff --git a/tests/intel/xe_coredump.c b/tests/intel/xe_coredump.c
>> new file mode 100644
>> index 000000000..9db79bb1d
>> --- /dev/null
>> +++ b/tests/intel/xe_coredump.c
>> @@ -0,0 +1,188 @@
>> +// SPDX-License-Identifier: MIT
>> +/*
>> + * Copyright © 2023 Intel Corporation
>> + */
>> +
>> +/**
>> + * TEST: Check devcoredump functionality
>> + * Category: Software building block
>> + * Sub-category: devcoredump
>> + * Test category: functionality test
>> + * Run type: BAT
>> + * Description: Test devcoredump functionality and dumping work as intended.
> Document also new subtest(s) here.
>
>> + */
>> +
>> +#include <string.h>
>> +#include <unistd.h>
>> +#include <fcntl.h>
>> +#include <sys/stat.h>
>> +#include <glob.h>
>> +#include <wchar.h>
> It should be sorted alphabetically but unistd.h is somewhat
> special and should be first.
Okay.
>
>> +
>> +#include "igt.h"
>> +#include "igt_device.h"
>> +#include "igt_io.h"
>> +#include "igt_syncobj.h"
>> +#include "igt_sysfs.h"
>> +
>> +#include "intel_pat.h"
>> +
>> +#include "xe_drm.h"
>> +#include "xe/xe_ioctl.h"
>> +#include "xe/xe_query.h"
>> +
>> +static struct xe_device *xe;
>> +static uint32_t batch_bo;
>> +static uint32_t *batch;
>> +static void *userptr;
>> +static uint32_t vm;
>> +
>> +#define MAX_N_ENGINES 32
>> +
>> +/*
>> + * Helper to read and clear devcore. We want to read it completely to ensure
>> + * we catch any kernel side regressions like:
>> + * https://gitlab.freedesktop.org/drm/msm/-/issues/20
>> + */
>> +
>> +static void
>> +read_and_clear_devcore(bool match)
> --------------------------^^^^
>
> Could you get rid of bool param? It is better to write two
> separate functions, one for clearing and one for dumping.
This function does not really care what the contents of the coredump
are, just that it can be read without a kernel panic.
>> +{
>> + glob_t glob_buf = {0};
>> + int ret, i;
>> +
>> + ret = glob("/sys/class/devcoredump/devcd*/data", GLOB_NOSORT, NULL, &glob_buf);
>> + if ((ret == GLOB_NOMATCH) || !glob_buf.gl_pathc)
>> + return;
>> +
>> + for (i = 0; i < glob_buf.gl_pathc; i++) {
>> + char buf[0x1000];
>> + int fd = open(glob_buf.gl_pathv[i], O_RDWR);
>> +
>> + if (fd == -1)
>> + continue;
>> +
>> + /*
>> + * We want to read the entire file but we can throw away the
>> + * contents.. we just want to make sure that we exercise the
>> + * kernel side codepaths hit when reading the devcore from
>> + * sysfs
>> + */
>> + igt_debug("---- begin coredump ----\n");
>> + do {
>> + memset(buf, 0, sizeof(buf));
> ----------- ^
> It is better to do it once before loop.
Yeah, except there may be multiple devcoredump capable devices, which is
why we clear all at the beginning of the test.
I should check the device path instead.
>
>> + ret = igt_readn(fd, buf, sizeof(buf) - 1);
>> + igt_debug("%s", buf);
> Consider limiting it to some reasonable size to not clobber CI.
> If you want you could add option to test to write it all.
>
>> + } while (ret > 0);
>> +
>> + igt_debug("---- end coredump ----\n");
>> +
>> + /* Clear the devcore: */
>> + igt_writen(fd, "1", 1);
> Is it enough to write "1" to devcore to clear it? Or should it
> be read out to get rid off the data?
The readout is to ensure the actual dumping part works succesfully, or
at least does not cause a kernel panic.
>> +
>> + close(fd);
>> + match = false;
> Better do it once, not in loop.
>
>> + }
>> +
> match = glob_buf.gl_pathc;
> and later
> igt_assert_f(match > 0, "No devcoredump found.\n");
>
>> + globfree(&glob_buf);
>> + igt_assert_f(!match, "No devcoredump found.\n");
>> +}
>> +
>> +/**
>> + * SUBTEST: %s
> -------------- ^
> When using param you should later give its values but here it is
> overcomplicating things, just name it:
>
> * SUBTEST: basic
>> + * Description: Hang the GPU, and read out the resulting devcoredump.
> -------------------------------^
> No need for comma "," before "and".
> Write in description what is the difference in second test.
>
>> + * Test category: functionality test
> Put here description for second test:
>
> * SUBTEST: all-simultaneously
>
> with other fields filled.
Yeah, wasn't sure how to add same description for 2 tests.
>> + */
>> +static void
>> +do_hang_test(bool all)
>> +{
>> + uint32_t *ptr = batch;
>> + uint64_t offset = xe->default_alignment - 4;
>> + struct drm_xe_engine_class_instance *hwe;
>> + uint32_t engines[MAX_N_ENGINES], syncobjs[MAX_N_ENGINES];
>> + int i = 0;
>> +
>> + memset(batch, 0, xe->default_alignment);
>> + *(ptr++) = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_GTE_SDD;
>> + *(ptr++) = 1;
>> + *(ptr++) = offset >> 32;
>> + *(ptr++) = offset;
>> + *(ptr++) = MI_BATCH_BUFFER_END;
>> +
>> + xe_for_each_engine(xe->fd, hwe) {
>> + struct drm_xe_sync sync = {
>> + .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
>> + .flags = DRM_XE_SYNC_FLAG_SIGNAL,
>> + };
>> +
>> + sync.handle = syncobjs[i] = syncobj_create(xe->fd, 0);
>> + engines[i] = xe_exec_queue_create(xe->fd, vm, hwe, 0);
>> + xe_exec_sync(xe->fd, engines[i], 0, &sync, 1);
>> + i++;
>> + if (!all)
>> + break;
>> + }
>> +
>> + /* Ensure all syncobjs are in a failed state */
>> + while (i--) {
>> + syncobj_wait(xe->fd, &syncobjs[i], 1, INT64_MAX, 0, NULL);
>> + syncobj_destroy(xe->fd, syncobjs[i]);
>> + }
>> +}
>> +
>> +igt_main
>> +{
>> + igt_fixture {
>> + struct drm_xe_sync sync = {
>> + .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
>> + .flags = DRM_XE_SYNC_FLAG_SIGNAL,
>> + };
>> + struct drm_xe_vm_bind_op bind_ops[2] = { };
>> +
>> + int fd = drm_open_driver_render(DRIVER_XE);
>> + xe = xe_device_get(fd);
>> +
>> + /* Before test, ensure devcoredump is empty */
>> + read_and_clear_devcore(false);
> Better:
> clear_devcore();
>
>> +
>> + vm = xe_vm_create(fd, 0, 0);
>> + batch_bo = xe_bo_create(fd, vm, xe->default_alignment, system_memory(fd), 0);
>> + batch = xe_bo_map(xe->fd, batch_bo, xe->default_alignment);
>> +
>> + userptr = mmap(0, xe->default_alignment, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
>> + wmemset(userptr, 0xf1234567, xe->default_alignment / sizeof(wchar_t));
>> +
>> + bind_ops[0].op = DRM_XE_VM_BIND_OP_MAP;
>> + bind_ops[0].obj = batch_bo;
>> + bind_ops[0].addr = 0;
>> +
>> + bind_ops[1].op = DRM_XE_VM_BIND_OP_MAP_USERPTR;
>> + bind_ops[1].userptr = (size_t)userptr;
>> + bind_ops[1].addr = 1ULL << 40ULL;
>> +
>> + bind_ops[0].flags = bind_ops[1].flags = DRM_XE_VM_BIND_FLAG_DUMPABLE;
>> + bind_ops[0].range = bind_ops[1].range = xe->default_alignment;
>> + bind_ops[0].pat_index = bind_ops[1].pat_index = intel_get_pat_idx_wb(fd);
>> +
>> + sync.handle = syncobj_create(fd, 0);
>> + xe_vm_bind_array(fd, vm, 0, bind_ops, ARRAY_SIZE(bind_ops), &sync, 1);
>> + syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
>> + syncobj_destroy(fd, sync.handle);
>> +
>> + }
>> +
>> + igt_describe("Test that hw fault coredump readout works");
>> + igt_subtest("basic") {
>> + do_hang_test(false);
>> +
>> + read_and_clear_devcore(true);
> Better:
> read_and_clear_devcore();
>
>> + }
>> +
>> + igt_describe("Hang all engines simultaneously");
>> + igt_subtest("all-simultaneously") {
>> + do_hang_test(true);
>> +
>> + read_and_clear_devcore(true);
> Same here:
> read_and_clear_devcore();
Thanks, I'll try to clear up the testcase.
Cheers,
Maarten
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] tests/intel: Add xe_coredump test
2024-01-15 12:34 ` Maarten Lankhorst
@ 2024-01-15 15:47 ` Kamil Konieczny
0 siblings, 0 replies; 8+ messages in thread
From: Kamil Konieczny @ 2024-01-15 15:47 UTC (permalink / raw)
To: igt-dev
Hi Maarten,
On 2024-01-15 at 13:34:33 +0100, Maarten Lankhorst wrote:
> Hey,
>
> Den 2024-01-12 kl. 17:53, skrev Kamil Konieczny:
> > Hi Maarten,
> > On 2024-01-12 at 13:40:43 +0100, Maarten Lankhorst wrote:
> > > Add a simple test that forces a GPU hang and then reads the resulting
> > > devcoredump file. Map a single userptr and BO, and dump the contents of
> > > those.
> > >
> > > Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > > ---
> > > tests/intel/xe_coredump.c | 188 ++++++++++++++++++++++++++++++++++++++
> > > tests/meson.build | 1 +
> > > 2 files changed, 189 insertions(+)
> > > create mode 100644 tests/intel/xe_coredump.c
> > >
> > > diff --git a/tests/intel/xe_coredump.c b/tests/intel/xe_coredump.c
> > > new file mode 100644
> > > index 000000000..9db79bb1d
> > > --- /dev/null
> > > +++ b/tests/intel/xe_coredump.c
> > > @@ -0,0 +1,188 @@
> > > +// SPDX-License-Identifier: MIT
> > > +/*
> > > + * Copyright © 2023 Intel Corporation
> > > + */
> > > +
> > > +/**
> > > + * TEST: Check devcoredump functionality
> > > + * Category: Software building block
> > > + * Sub-category: devcoredump
> > > + * Test category: functionality test
> > > + * Run type: BAT
> > > + * Description: Test devcoredump functionality and dumping work as intended.
> > Document also new subtest(s) here.
> >
> > > + */
> > > +
> > > +#include <string.h>
> > > +#include <unistd.h>
> > > +#include <fcntl.h>
> > > +#include <sys/stat.h>
> > > +#include <glob.h>
> > > +#include <wchar.h>
> > It should be sorted alphabetically but unistd.h is somewhat
> > special and should be first.
> Okay.
> >
> > > +
> > > +#include "igt.h"
> > > +#include "igt_device.h"
> > > +#include "igt_io.h"
> > > +#include "igt_syncobj.h"
> > > +#include "igt_sysfs.h"
> > > +
> > > +#include "intel_pat.h"
> > > +
> > > +#include "xe_drm.h"
> > > +#include "xe/xe_ioctl.h"
> > > +#include "xe/xe_query.h"
> > > +
> > > +static struct xe_device *xe;
> > > +static uint32_t batch_bo;
> > > +static uint32_t *batch;
> > > +static void *userptr;
> > > +static uint32_t vm;
> > > +
> > > +#define MAX_N_ENGINES 32
> > > +
> > > +/*
> > > + * Helper to read and clear devcore. We want to read it completely to ensure
> > > + * we catch any kernel side regressions like:
> > > + * https://gitlab.freedesktop.org/drm/msm/-/issues/20
> > > + */
> > > +
> > > +static void
> > > +read_and_clear_devcore(bool match)
> > --------------------------^^^^
> >
> > Could you get rid of bool param? It is better to write two
> > separate functions, one for clearing and one for dumping.
> This function does not really care what the contents of the coredump are,
> just that it can be read without a kernel panic.
Ok, but imho it should read it after clearing:
preparations:
clear all devcores
test:
casue hang
read devcore
What I was talking about was that preparation step, no need to
read it out in that step. For that you could write clear_devcore()
with only clearing it with the fast method and no igt_asserts() there.
If you prefere you could debug print which devcores you cleared
(e.g. which one were not empty before executing your test).
> > > +{
> > > + glob_t glob_buf = {0};
> > > + int ret, i;
> > > +
> > > + ret = glob("/sys/class/devcoredump/devcd*/data", GLOB_NOSORT, NULL, &glob_buf);
> > > + if ((ret == GLOB_NOMATCH) || !glob_buf.gl_pathc)
> > > + return;
> > > +
> > > + for (i = 0; i < glob_buf.gl_pathc; i++) {
> > > + char buf[0x1000];
> > > + int fd = open(glob_buf.gl_pathv[i], O_RDWR);
> > > +
> > > + if (fd == -1)
> > > + continue;
> > > +
> > > + /*
> > > + * We want to read the entire file but we can throw away the
> > > + * contents.. we just want to make sure that we exercise the
> > > + * kernel side codepaths hit when reading the devcore from
> > > + * sysfs
> > > + */
> > > + igt_debug("---- begin coredump ----\n");
> > > + do {
> > > + memset(buf, 0, sizeof(buf));
> > ----------- ^
> > It is better to do it once before loop.
>
> Yeah, except there may be multiple devcoredump capable devices, which is why
> we clear all at the beginning of the test.
Here I was talking about memset before reading into buf, it seems I should
look into igt_readn. Simplify it into:
do { /* read devcore and write it with igt_debug() */
ret = igt_readn(fd, buf, sizeof(buf) - 2);
if (ret <= 0)
break;
buf[ret + 1] = 0;
igt_debug("%s", buf);
} while(true);
Regards,
Kamil
>
> I should check the device path instead.
>
> >
> > > + ret = igt_readn(fd, buf, sizeof(buf) - 1);
> > > + igt_debug("%s", buf);
> > Consider limiting it to some reasonable size to not clobber CI.
> > If you want you could add option to test to write it all.
> >
> > > + } while (ret > 0);
> > > +
> > > + igt_debug("---- end coredump ----\n");
> > > +
> > > + /* Clear the devcore: */
> > > + igt_writen(fd, "1", 1);
> > Is it enough to write "1" to devcore to clear it? Or should it
> > be read out to get rid off the data?
> The readout is to ensure the actual dumping part works succesfully, or at
> least does not cause a kernel panic.
> > > +
> > > + close(fd);
> > > + match = false;
> > Better do it once, not in loop.
> >
> > > + }
> > > +
> > match = glob_buf.gl_pathc;
> > and later
> > igt_assert_f(match > 0, "No devcoredump found.\n");
> >
> > > + globfree(&glob_buf);
> > > + igt_assert_f(!match, "No devcoredump found.\n");
> > > +}
> > > +
> > > +/**
> > > + * SUBTEST: %s
> > -------------- ^
> > When using param you should later give its values but here it is
> > overcomplicating things, just name it:
> >
> > * SUBTEST: basic
> > > + * Description: Hang the GPU, and read out the resulting devcoredump.
> > -------------------------------^
> > No need for comma "," before "and".
> > Write in description what is the difference in second test.
> >
> > > + * Test category: functionality test
> > Put here description for second test:
> >
> > * SUBTEST: all-simultaneously
> >
> > with other fields filled.
> Yeah, wasn't sure how to add same description for 2 tests.
> > > + */
> > > +static void
> > > +do_hang_test(bool all)
> > > +{
> > > + uint32_t *ptr = batch;
> > > + uint64_t offset = xe->default_alignment - 4;
> > > + struct drm_xe_engine_class_instance *hwe;
> > > + uint32_t engines[MAX_N_ENGINES], syncobjs[MAX_N_ENGINES];
> > > + int i = 0;
> > > +
> > > + memset(batch, 0, xe->default_alignment);
> > > + *(ptr++) = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_GTE_SDD;
> > > + *(ptr++) = 1;
> > > + *(ptr++) = offset >> 32;
> > > + *(ptr++) = offset;
> > > + *(ptr++) = MI_BATCH_BUFFER_END;
> > > +
> > > + xe_for_each_engine(xe->fd, hwe) {
> > > + struct drm_xe_sync sync = {
> > > + .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
> > > + .flags = DRM_XE_SYNC_FLAG_SIGNAL,
> > > + };
> > > +
> > > + sync.handle = syncobjs[i] = syncobj_create(xe->fd, 0);
> > > + engines[i] = xe_exec_queue_create(xe->fd, vm, hwe, 0);
> > > + xe_exec_sync(xe->fd, engines[i], 0, &sync, 1);
> > > + i++;
> > > + if (!all)
> > > + break;
> > > + }
> > > +
> > > + /* Ensure all syncobjs are in a failed state */
> > > + while (i--) {
> > > + syncobj_wait(xe->fd, &syncobjs[i], 1, INT64_MAX, 0, NULL);
> > > + syncobj_destroy(xe->fd, syncobjs[i]);
> > > + }
> > > +}
> > > +
> > > +igt_main
> > > +{
> > > + igt_fixture {
> > > + struct drm_xe_sync sync = {
> > > + .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
> > > + .flags = DRM_XE_SYNC_FLAG_SIGNAL,
> > > + };
> > > + struct drm_xe_vm_bind_op bind_ops[2] = { };
> > > +
> > > + int fd = drm_open_driver_render(DRIVER_XE);
> > > + xe = xe_device_get(fd);
> > > +
> > > + /* Before test, ensure devcoredump is empty */
> > > + read_and_clear_devcore(false);
> > Better:
> > clear_devcore();
> >
> > > +
> > > + vm = xe_vm_create(fd, 0, 0);
> > > + batch_bo = xe_bo_create(fd, vm, xe->default_alignment, system_memory(fd), 0);
> > > + batch = xe_bo_map(xe->fd, batch_bo, xe->default_alignment);
> > > +
> > > + userptr = mmap(0, xe->default_alignment, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> > > + wmemset(userptr, 0xf1234567, xe->default_alignment / sizeof(wchar_t));
> > > +
> > > + bind_ops[0].op = DRM_XE_VM_BIND_OP_MAP;
> > > + bind_ops[0].obj = batch_bo;
> > > + bind_ops[0].addr = 0;
> > > +
> > > + bind_ops[1].op = DRM_XE_VM_BIND_OP_MAP_USERPTR;
> > > + bind_ops[1].userptr = (size_t)userptr;
> > > + bind_ops[1].addr = 1ULL << 40ULL;
> > > +
> > > + bind_ops[0].flags = bind_ops[1].flags = DRM_XE_VM_BIND_FLAG_DUMPABLE;
> > > + bind_ops[0].range = bind_ops[1].range = xe->default_alignment;
> > > + bind_ops[0].pat_index = bind_ops[1].pat_index = intel_get_pat_idx_wb(fd);
> > > +
> > > + sync.handle = syncobj_create(fd, 0);
> > > + xe_vm_bind_array(fd, vm, 0, bind_ops, ARRAY_SIZE(bind_ops), &sync, 1);
> > > + syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
> > > + syncobj_destroy(fd, sync.handle);
> > > +
> > > + }
> > > +
> > > + igt_describe("Test that hw fault coredump readout works");
> > > + igt_subtest("basic") {
> > > + do_hang_test(false);
> > > +
> > > + read_and_clear_devcore(true);
> > Better:
> > read_and_clear_devcore();
> >
> > > + }
> > > +
> > > + igt_describe("Hang all engines simultaneously");
> > > + igt_subtest("all-simultaneously") {
> > > + do_hang_test(true);
> > > +
> > > + read_and_clear_devcore(true);
> > Same here:
> > read_and_clear_devcore();
>
> Thanks, I'll try to clear up the testcase.
>
> Cheers,
>
> Maarten
>
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2024-01-15 15:47 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-01-12 12:40 [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Maarten Lankhorst
2024-01-12 12:40 ` [PATCH 2/2] tests/intel: Add xe_coredump test Maarten Lankhorst
2024-01-12 16:53 ` Kamil Konieczny
2024-01-15 12:34 ` Maarten Lankhorst
2024-01-15 15:47 ` Kamil Konieczny
2024-01-12 13:09 ` [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Francois Dugast
2024-01-12 13:39 ` ✗ Fi.CI.BUILD: failure for series starting with [1/2] " Patchwork
2024-01-12 16:05 ` [PATCH 1/2] " Kamil Konieczny
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox