Igt-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes.
@ 2024-01-12 12:40 Maarten Lankhorst
  2024-01-12 12:40 ` [PATCH 2/2] tests/intel: Add xe_coredump test Maarten Lankhorst
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Maarten Lankhorst @ 2024-01-12 12:40 UTC (permalink / raw)
  To: igt-dev

Required for devcoredump test.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 include/drm-uapi/xe_drm.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h
index bacdca787..0f975f57f 100644
--- a/include/drm-uapi/xe_drm.h
+++ b/include/drm-uapi/xe_drm.h
@@ -3,8 +3,8 @@
  * Copyright © 2023 Intel Corporation
  */
 
-#ifndef _XE_DRM_H_
-#define _XE_DRM_H_
+#ifndef _UAPI_XE_DRM_H_
+#define _UAPI_XE_DRM_H_
 
 #include "drm.h"
 
@@ -130,7 +130,7 @@ extern "C" {
  * redefine the interface more easily than an ever growing struct of
  * increasing complexity, and for large parts of that interface to be
  * entirely optional. The downside is more pointer chasing; chasing across
- * the boundary with pointers encapsulated inside u64.
+ * the __user boundary with pointers encapsulated inside u64.
  *
  * Example chaining:
  *
@@ -832,7 +832,6 @@ struct drm_xe_vm_destroy {
  *
  * and the @flags can be:
  *  - %DRM_XE_VM_BIND_FLAG_READONLY
- *  - %DRM_XE_VM_BIND_FLAG_ASYNC
  *  - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the
  *    MAP operation immediately rather than deferring the MAP to the page
  *    fault handler.
@@ -931,6 +930,7 @@ struct drm_xe_vm_bind_op {
 #define DRM_XE_VM_BIND_FLAG_READONLY	(1 << 0)
 #define DRM_XE_VM_BIND_FLAG_IMMEDIATE	(1 << 1)
 #define DRM_XE_VM_BIND_FLAG_NULL	(1 << 2)
+#define DRM_XE_VM_BIND_FLAG_DUMPABLE	(1 << 3)
 	/** @flags: Bind flags */
 	__u32 flags;
 
@@ -1344,4 +1344,4 @@ struct drm_xe_wait_user_fence {
 }
 #endif
 
-#endif /* _XE_DRM_H_ */
+#endif /* _UAPI_XE_DRM_H_ */
-- 
2.40.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] tests/intel: Add xe_coredump test
  2024-01-12 12:40 [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Maarten Lankhorst
@ 2024-01-12 12:40 ` Maarten Lankhorst
  2024-01-12 16:53   ` Kamil Konieczny
  2024-01-12 13:09 ` [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Francois Dugast
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Maarten Lankhorst @ 2024-01-12 12:40 UTC (permalink / raw)
  To: igt-dev

Add a simple test that forces a GPU hang and then reads the resulting
devcoredump file. Map a single userptr and BO, and dump the contents of
those.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 tests/intel/xe_coredump.c | 188 ++++++++++++++++++++++++++++++++++++++
 tests/meson.build         |   1 +
 2 files changed, 189 insertions(+)
 create mode 100644 tests/intel/xe_coredump.c

diff --git a/tests/intel/xe_coredump.c b/tests/intel/xe_coredump.c
new file mode 100644
index 000000000..9db79bb1d
--- /dev/null
+++ b/tests/intel/xe_coredump.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+/**
+ * TEST: Check devcoredump functionality
+ * Category: Software building block
+ * Sub-category: devcoredump
+ * Test category: functionality test
+ * Run type: BAT
+ * Description: Test devcoredump functionality and dumping work as intended.
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <glob.h>
+#include <wchar.h>
+
+#include "igt.h"
+#include "igt_device.h"
+#include "igt_io.h"
+#include "igt_syncobj.h"
+#include "igt_sysfs.h"
+
+#include "intel_pat.h"
+
+#include "xe_drm.h"
+#include "xe/xe_ioctl.h"
+#include "xe/xe_query.h"
+
+static struct xe_device *xe;
+static uint32_t batch_bo;
+static uint32_t *batch;
+static void *userptr;
+static uint32_t vm;
+
+#define MAX_N_ENGINES 32
+
+/*
+ * Helper to read and clear devcore.  We want to read it completely to ensure
+ * we catch any kernel side regressions like:
+ * https://gitlab.freedesktop.org/drm/msm/-/issues/20
+ */
+
+static void
+read_and_clear_devcore(bool match)
+{
+	glob_t glob_buf = {0};
+	int ret, i;
+
+	ret = glob("/sys/class/devcoredump/devcd*/data", GLOB_NOSORT, NULL, &glob_buf);
+	if ((ret == GLOB_NOMATCH) || !glob_buf.gl_pathc)
+		return;
+
+	for (i = 0; i < glob_buf.gl_pathc; i++) {
+		char buf[0x1000];
+		int fd = open(glob_buf.gl_pathv[i], O_RDWR);
+
+		if (fd == -1)
+			continue;
+
+		/*
+		 * We want to read the entire file but we can throw away the
+		 * contents.. we just want to make sure that we exercise the
+		 * kernel side codepaths hit when reading the devcore from
+		 * sysfs
+		 */
+		igt_debug("---- begin coredump ----\n");
+		do {
+			memset(buf, 0, sizeof(buf));
+			ret = igt_readn(fd, buf, sizeof(buf) - 1);
+			igt_debug("%s", buf);
+		} while (ret > 0);
+
+		igt_debug("---- end coredump ----\n");
+
+		/* Clear the devcore: */
+		igt_writen(fd, "1", 1);
+
+		close(fd);
+		match = false;
+	}
+
+	globfree(&glob_buf);
+	igt_assert_f(!match, "No devcoredump found.\n");
+}
+
+/**
+ * SUBTEST: %s
+ * Description: Hang the GPU, and read out the resulting devcoredump.
+ * Test category: functionality test
+ */
+static void
+do_hang_test(bool all)
+{
+	uint32_t *ptr = batch;
+	uint64_t offset = xe->default_alignment - 4;
+	struct drm_xe_engine_class_instance *hwe;
+	uint32_t engines[MAX_N_ENGINES], syncobjs[MAX_N_ENGINES];
+	int i = 0;
+
+	memset(batch, 0, xe->default_alignment);
+	*(ptr++) = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_GTE_SDD;
+	*(ptr++) = 1;
+	*(ptr++) = offset >> 32;
+	*(ptr++) = offset;
+	*(ptr++) = MI_BATCH_BUFFER_END;
+
+	xe_for_each_engine(xe->fd, hwe) {
+		struct drm_xe_sync sync = {
+			.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
+			.flags = DRM_XE_SYNC_FLAG_SIGNAL,
+		};
+
+		sync.handle = syncobjs[i] = syncobj_create(xe->fd, 0);
+		engines[i] = xe_exec_queue_create(xe->fd, vm, hwe, 0);
+		xe_exec_sync(xe->fd, engines[i], 0, &sync, 1);
+		i++;
+		if (!all)
+			break;
+	}
+
+	/* Ensure all syncobjs are in a failed state */
+	while (i--) {
+		syncobj_wait(xe->fd, &syncobjs[i], 1, INT64_MAX, 0, NULL);
+		syncobj_destroy(xe->fd, syncobjs[i]);
+	}
+}
+
+igt_main
+{
+	igt_fixture {
+		struct drm_xe_sync sync = {
+			.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
+			.flags = DRM_XE_SYNC_FLAG_SIGNAL,
+		};
+		struct drm_xe_vm_bind_op bind_ops[2] = { };
+
+		int fd = drm_open_driver_render(DRIVER_XE);
+		xe = xe_device_get(fd);
+
+		/* Before test, ensure devcoredump is empty */
+		read_and_clear_devcore(false);
+
+		vm = xe_vm_create(fd, 0, 0);
+		batch_bo = xe_bo_create(fd, vm, xe->default_alignment, system_memory(fd), 0);
+		batch = xe_bo_map(xe->fd, batch_bo, xe->default_alignment);
+
+		userptr = mmap(0, xe->default_alignment, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+		wmemset(userptr, 0xf1234567, xe->default_alignment / sizeof(wchar_t));
+
+		bind_ops[0].op = DRM_XE_VM_BIND_OP_MAP;
+		bind_ops[0].obj = batch_bo;
+		bind_ops[0].addr = 0;
+
+		bind_ops[1].op = DRM_XE_VM_BIND_OP_MAP_USERPTR;
+		bind_ops[1].userptr = (size_t)userptr;
+		bind_ops[1].addr = 1ULL << 40ULL;
+
+		bind_ops[0].flags = bind_ops[1].flags = DRM_XE_VM_BIND_FLAG_DUMPABLE;
+		bind_ops[0].range = bind_ops[1].range = xe->default_alignment;
+		bind_ops[0].pat_index = bind_ops[1].pat_index = intel_get_pat_idx_wb(fd);
+
+		sync.handle = syncobj_create(fd, 0);
+		xe_vm_bind_array(fd, vm, 0, bind_ops, ARRAY_SIZE(bind_ops), &sync, 1);
+		syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
+		syncobj_destroy(fd, sync.handle);
+
+	}
+
+	igt_describe("Test that hw fault coredump readout works");
+	igt_subtest("basic") {
+		do_hang_test(false);
+
+		read_and_clear_devcore(true);
+	}
+
+	igt_describe("Hang all engines simultaneously");
+	igt_subtest("all-simultaneously") {
+		do_hang_test(true);
+
+		read_and_clear_devcore(true);
+	}
+}
+
diff --git a/tests/meson.build b/tests/meson.build
index a6a8498e2..cb19627e1 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -279,6 +279,7 @@ intel_xe_progs = [
 	'xe_create',
 	'xe_compute',
 	'xe_copy_basic',
+	'xe_coredump',
 	'xe_dma_buf_sync',
 	'xe_debugfs',
 	'xe_drm_fdinfo',
-- 
2.40.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes.
  2024-01-12 12:40 [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Maarten Lankhorst
  2024-01-12 12:40 ` [PATCH 2/2] tests/intel: Add xe_coredump test Maarten Lankhorst
@ 2024-01-12 13:09 ` Francois Dugast
  2024-01-12 13:39 ` ✗ Fi.CI.BUILD: failure for series starting with [1/2] " Patchwork
  2024-01-12 16:05 ` [PATCH 1/2] " Kamil Konieczny
  3 siblings, 0 replies; 8+ messages in thread
From: Francois Dugast @ 2024-01-12 13:09 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: igt-dev

Hi Maarten,

On Fri, Jan 12, 2024 at 01:40:42PM +0100, Maarten Lankhorst wrote:
> Required for devcoredump test.
> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>  include/drm-uapi/xe_drm.h | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h
> index bacdca787..0f975f57f 100644
> --- a/include/drm-uapi/xe_drm.h
> +++ b/include/drm-uapi/xe_drm.h
> @@ -3,8 +3,8 @@
>   * Copyright © 2023 Intel Corporation
>   */
>  
> -#ifndef _XE_DRM_H_
> -#define _XE_DRM_H_
> +#ifndef _UAPI_XE_DRM_H_
> +#define _UAPI_XE_DRM_H_

This ^ and __user below seem to indicate the header was copied from
KMD sources instead of being generated, for example with:

    $ make headers_install INSTALL_HDR_PATH=/tmp/headers

See https://www.kernel.org/doc/html/latest/kbuild/headers_install.html

Francois

>  
>  #include "drm.h"
>  
> @@ -130,7 +130,7 @@ extern "C" {
>   * redefine the interface more easily than an ever growing struct of
>   * increasing complexity, and for large parts of that interface to be
>   * entirely optional. The downside is more pointer chasing; chasing across
> - * the boundary with pointers encapsulated inside u64.
> + * the __user boundary with pointers encapsulated inside u64.
>   *
>   * Example chaining:
>   *
> @@ -832,7 +832,6 @@ struct drm_xe_vm_destroy {
>   *
>   * and the @flags can be:
>   *  - %DRM_XE_VM_BIND_FLAG_READONLY
> - *  - %DRM_XE_VM_BIND_FLAG_ASYNC
>   *  - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the
>   *    MAP operation immediately rather than deferring the MAP to the page
>   *    fault handler.
> @@ -931,6 +930,7 @@ struct drm_xe_vm_bind_op {
>  #define DRM_XE_VM_BIND_FLAG_READONLY	(1 << 0)
>  #define DRM_XE_VM_BIND_FLAG_IMMEDIATE	(1 << 1)
>  #define DRM_XE_VM_BIND_FLAG_NULL	(1 << 2)
> +#define DRM_XE_VM_BIND_FLAG_DUMPABLE	(1 << 3)
>  	/** @flags: Bind flags */
>  	__u32 flags;
>  
> @@ -1344,4 +1344,4 @@ struct drm_xe_wait_user_fence {
>  }
>  #endif
>  
> -#endif /* _XE_DRM_H_ */
> +#endif /* _UAPI_XE_DRM_H_ */
> -- 
> 2.40.1
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* ✗ Fi.CI.BUILD: failure for series starting with [1/2] tests/intel: Sync headers to grab DUMPABLE changes.
  2024-01-12 12:40 [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Maarten Lankhorst
  2024-01-12 12:40 ` [PATCH 2/2] tests/intel: Add xe_coredump test Maarten Lankhorst
  2024-01-12 13:09 ` [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Francois Dugast
@ 2024-01-12 13:39 ` Patchwork
  2024-01-12 16:05 ` [PATCH 1/2] " Kamil Konieczny
  3 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2024-01-12 13:39 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: igt-dev

== Series Details ==

Series: series starting with [1/2] tests/intel: Sync headers to grab DUMPABLE changes.
URL   : https://patchwork.freedesktop.org/series/128716/
State : failure

== Summary ==

IGT patchset build failed on latest successful build
16d7484a28cc1aaf2dfb9e9c73c011daf602d926 tests/intel/xe_exec_store: don't pass gt_id as bind queue

Tail of build.log:
[1615/1664] Linking target tools/intel_panel_fitter.
[1616/1664] Linking target tools/intel_stepping.
[1617/1664] Linking target tools/intel_gem_info.
[1618/1664] Linking target tools/intel_firmware_decode.
[1619/1664] Linking target tools/intel_pm_rpm.
[1620/1664] Linking target tools/intel_watermark.
[1621/1664] Linking target tools/intel_gvtg_test.
[1622/1664] Linking target runner/testdata/abort-simple.
[1623/1664] Linking target tools/amd_hdmi_compliance.
[1624/1664] Generating gem_stress.testlist with a meson_exe.py custom command.
[1625/1664] Linking target runner/igt_comms_decoder.
[1626/1664] Linking target tools/msm_dp_compliance.
[1627/1664] Linking target tools/intel_l3_parity.
[1628/1664] Linking target runner/testdata/no-subtests.
[1629/1664] Linking target runner/testdata/abort.
[1630/1664] Linking target runner/testdata/skippers.
[1631/1664] Linking target tools/intel_reg.
[1632/1664] Linking target runner/testdata/abort-dynamic.
[1633/1664] Linking target runner/runner_json_test.
[1634/1664] Linking target runner/testdata/successtest.
[1635/1664] Linking target tools/lsgpu.
[1636/1664] Linking target runner/testdata/dynamic.
[1637/1664] Linking target runner/igt_runner.
[1638/1664] Linking target runner/igt_resume.
[1639/1664] Linking target runner/testdata/abort-fixture.
[1640/1664] Linking target runner/igt_results.
[1641/1664] Compiling C object 'lib/76b5a35@@i915_perf@sha/meson-generated_.._i915_perf_metrics_mtlgt3.c.o'.
[1642/1664] Compiling C object 'lib/76b5a35@@i915_perf@sha/meson-generated_.._i915_perf_metrics_acmgt1.c.o'.
[1643/1664] Compiling C object 'runner/527aa9f@@runner_test@exe/runner_tests.c.o'.
[1644/1664] Linking target runner/runner_test.
[1645/1664] Compiling C object 'lib/76b5a35@@i915_perf@sha/meson-generated_.._i915_perf_metrics_acmgt2.c.o'.
[1646/1664] Compiling C object 'lib/76b5a35@@i915_perf@sha/meson-generated_.._i915_perf_metrics_acmgt3.c.o'.
[1647/1664] Linking target lib/libi915_perf.so.1.5.
[1648/1664] Generating symbol file 'lib/76b5a35@@i915_perf@sha/libi915_perf.so.1.5.symbols'.
[1649/1664] Linking target tools/i915-perf/i915-perf-configs.
[1650/1664] Linking target tools/i915-perf/i915-perf-reader.
[1651/1664] Linking target tools/i915-perf/i915-perf-recorder.
[1652/1664] Linking target tests/gem_barrier_race.
[1653/1664] Linking target tests/core_hotunplug.
[1654/1664] Linking target tests/perf.
[1655/1664] Generating perf.testlist with a meson_exe.py custom command.
[1656/1664] Generating gem_barrier_race.testlist with a meson_exe.py custom command.
[1657/1664] Generating core_hotunplug.testlist with a meson_exe.py custom command.
[1658/1664] Generating xe_tests.rst with a custom command.
FAILED: docs/testplan/xe_tests.rst 
/usr/src/igt-gpu-tools/scripts/igt_doc.py --config /usr/src/igt-gpu-tools/tests/intel/xe_test_config.json --rest docs/testplan/xe_tests.rst --check-testlist --igt-build-path /opt/igt/build
/usr/src/igt-gpu-tools/tests/intel/xe_coredump.c:subtest igt@xe_coredump@%s needs 1 arguments but only 0 are defined.
[1659/1664] Generating i915_tests.rst with a custom command.
[1660/1664] Generating kms_tests.rst with a custom command.
ninja: build stopped: subcommand failed.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes.
  2024-01-12 12:40 [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Maarten Lankhorst
                   ` (2 preceding siblings ...)
  2024-01-12 13:39 ` ✗ Fi.CI.BUILD: failure for series starting with [1/2] " Patchwork
@ 2024-01-12 16:05 ` Kamil Konieczny
  3 siblings, 0 replies; 8+ messages in thread
From: Kamil Konieczny @ 2024-01-12 16:05 UTC (permalink / raw)
  To: igt-dev

Hi Maarten,
On 2024-01-12 at 13:40:42 +0100, Maarten Lankhorst wrote:
> Required for devcoredump test.

please change subject and commit description to something like
in commit bc36d4f06ca0557371fda2e97361147d6ac4a3e6, so for subject:

drm-uapi/xe: Sync with DUMPABLE vm_bind flag

and for description cite commit from kernel (taken from above example):

Align with commit ("drm/xe/uapi: Remove sync binds")

If it was not accepted and you need it for testing, declare it in
test and send proper uapi/xe sync later.
Other way would be to introduce lib/xe/xe_drm_local.h like similar
lib/i915/i915_drm_local.h for i915.

+cc Francois

Regards,
Kamil

> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>  include/drm-uapi/xe_drm.h | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h
> index bacdca787..0f975f57f 100644
> --- a/include/drm-uapi/xe_drm.h
> +++ b/include/drm-uapi/xe_drm.h
> @@ -3,8 +3,8 @@
>   * Copyright © 2023 Intel Corporation
>   */
>  
> -#ifndef _XE_DRM_H_
> -#define _XE_DRM_H_
> +#ifndef _UAPI_XE_DRM_H_
> +#define _UAPI_XE_DRM_H_
>  
>  #include "drm.h"
>  
> @@ -130,7 +130,7 @@ extern "C" {
>   * redefine the interface more easily than an ever growing struct of
>   * increasing complexity, and for large parts of that interface to be
>   * entirely optional. The downside is more pointer chasing; chasing across
> - * the boundary with pointers encapsulated inside u64.
> + * the __user boundary with pointers encapsulated inside u64.
>   *
>   * Example chaining:
>   *
> @@ -832,7 +832,6 @@ struct drm_xe_vm_destroy {
>   *
>   * and the @flags can be:
>   *  - %DRM_XE_VM_BIND_FLAG_READONLY
> - *  - %DRM_XE_VM_BIND_FLAG_ASYNC
>   *  - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the
>   *    MAP operation immediately rather than deferring the MAP to the page
>   *    fault handler.
> @@ -931,6 +930,7 @@ struct drm_xe_vm_bind_op {
>  #define DRM_XE_VM_BIND_FLAG_READONLY	(1 << 0)
>  #define DRM_XE_VM_BIND_FLAG_IMMEDIATE	(1 << 1)
>  #define DRM_XE_VM_BIND_FLAG_NULL	(1 << 2)
> +#define DRM_XE_VM_BIND_FLAG_DUMPABLE	(1 << 3)
>  	/** @flags: Bind flags */
>  	__u32 flags;
>  
> @@ -1344,4 +1344,4 @@ struct drm_xe_wait_user_fence {
>  }
>  #endif
>  
> -#endif /* _XE_DRM_H_ */
> +#endif /* _UAPI_XE_DRM_H_ */
> -- 
> 2.40.1
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] tests/intel: Add xe_coredump test
  2024-01-12 12:40 ` [PATCH 2/2] tests/intel: Add xe_coredump test Maarten Lankhorst
@ 2024-01-12 16:53   ` Kamil Konieczny
  2024-01-15 12:34     ` Maarten Lankhorst
  0 siblings, 1 reply; 8+ messages in thread
From: Kamil Konieczny @ 2024-01-12 16:53 UTC (permalink / raw)
  To: igt-dev

Hi Maarten,
On 2024-01-12 at 13:40:43 +0100, Maarten Lankhorst wrote:
> Add a simple test that forces a GPU hang and then reads the resulting
> devcoredump file. Map a single userptr and BO, and dump the contents of
> those.
> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>  tests/intel/xe_coredump.c | 188 ++++++++++++++++++++++++++++++++++++++
>  tests/meson.build         |   1 +
>  2 files changed, 189 insertions(+)
>  create mode 100644 tests/intel/xe_coredump.c
> 
> diff --git a/tests/intel/xe_coredump.c b/tests/intel/xe_coredump.c
> new file mode 100644
> index 000000000..9db79bb1d
> --- /dev/null
> +++ b/tests/intel/xe_coredump.c
> @@ -0,0 +1,188 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2023 Intel Corporation
> + */
> +
> +/**
> + * TEST: Check devcoredump functionality
> + * Category: Software building block
> + * Sub-category: devcoredump
> + * Test category: functionality test
> + * Run type: BAT
> + * Description: Test devcoredump functionality and dumping work as intended.

Document also new subtest(s) here.

> + */
> +
> +#include <string.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <sys/stat.h>
> +#include <glob.h>
> +#include <wchar.h>

It should be sorted alphabetically but unistd.h is somewhat
special and should be first.

> +
> +#include "igt.h"
> +#include "igt_device.h"
> +#include "igt_io.h"
> +#include "igt_syncobj.h"
> +#include "igt_sysfs.h"
> +
> +#include "intel_pat.h"
> +
> +#include "xe_drm.h"
> +#include "xe/xe_ioctl.h"
> +#include "xe/xe_query.h"
> +
> +static struct xe_device *xe;
> +static uint32_t batch_bo;
> +static uint32_t *batch;
> +static void *userptr;
> +static uint32_t vm;
> +
> +#define MAX_N_ENGINES 32
> +
> +/*
> + * Helper to read and clear devcore.  We want to read it completely to ensure
> + * we catch any kernel side regressions like:
> + * https://gitlab.freedesktop.org/drm/msm/-/issues/20
> + */
> +
> +static void
> +read_and_clear_devcore(bool match)
--------------------------^^^^

Could you get rid of bool param? It is better to write two
separate functions, one for clearing and one for dumping.

> +{
> +	glob_t glob_buf = {0};
> +	int ret, i;
> +
> +	ret = glob("/sys/class/devcoredump/devcd*/data", GLOB_NOSORT, NULL, &glob_buf);
> +	if ((ret == GLOB_NOMATCH) || !glob_buf.gl_pathc)
> +		return;
> +
> +	for (i = 0; i < glob_buf.gl_pathc; i++) {
> +		char buf[0x1000];
> +		int fd = open(glob_buf.gl_pathv[i], O_RDWR);
> +
> +		if (fd == -1)
> +			continue;
> +
> +		/*
> +		 * We want to read the entire file but we can throw away the
> +		 * contents.. we just want to make sure that we exercise the
> +		 * kernel side codepaths hit when reading the devcore from
> +		 * sysfs
> +		 */
> +		igt_debug("---- begin coredump ----\n");
> +		do {
> +			memset(buf, 0, sizeof(buf));
----------- ^
It is better to do it once before loop.

> +			ret = igt_readn(fd, buf, sizeof(buf) - 1);
> +			igt_debug("%s", buf);

Consider limiting it to some reasonable size to not clobber CI.
If you want you could add option to test to write it all.

> +		} while (ret > 0);
> +
> +		igt_debug("---- end coredump ----\n");
> +
> +		/* Clear the devcore: */
> +		igt_writen(fd, "1", 1);

Is it enough to write "1" to devcore to clear it? Or should it
be read out to get rid off the data?

> +
> +		close(fd);
> +		match = false;

Better do it once, not in loop.

> +	}
> +

match = glob_buf.gl_pathc;
and later
	igt_assert_f(match > 0, "No devcoredump found.\n");

> +	globfree(&glob_buf);
> +	igt_assert_f(!match, "No devcoredump found.\n");
> +}
> +
> +/**
> + * SUBTEST: %s
-------------- ^
When using param you should later give its values but here it is
overcomplicating things, just name it:

 * SUBTEST: basic

> + * Description: Hang the GPU, and read out the resulting devcoredump.
-------------------------------^
No need for comma "," before "and".
Write in description what is the difference in second test.

> + * Test category: functionality test

Put here description for second test:

 * SUBTEST: all-simultaneously

with other fields filled.

> + */
> +static void
> +do_hang_test(bool all)
> +{
> +	uint32_t *ptr = batch;
> +	uint64_t offset = xe->default_alignment - 4;
> +	struct drm_xe_engine_class_instance *hwe;
> +	uint32_t engines[MAX_N_ENGINES], syncobjs[MAX_N_ENGINES];
> +	int i = 0;
> +
> +	memset(batch, 0, xe->default_alignment);
> +	*(ptr++) = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_GTE_SDD;
> +	*(ptr++) = 1;
> +	*(ptr++) = offset >> 32;
> +	*(ptr++) = offset;
> +	*(ptr++) = MI_BATCH_BUFFER_END;
> +
> +	xe_for_each_engine(xe->fd, hwe) {
> +		struct drm_xe_sync sync = {
> +			.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
> +			.flags = DRM_XE_SYNC_FLAG_SIGNAL,
> +		};
> +
> +		sync.handle = syncobjs[i] = syncobj_create(xe->fd, 0);
> +		engines[i] = xe_exec_queue_create(xe->fd, vm, hwe, 0);
> +		xe_exec_sync(xe->fd, engines[i], 0, &sync, 1);
> +		i++;
> +		if (!all)
> +			break;
> +	}
> +
> +	/* Ensure all syncobjs are in a failed state */
> +	while (i--) {
> +		syncobj_wait(xe->fd, &syncobjs[i], 1, INT64_MAX, 0, NULL);
> +		syncobj_destroy(xe->fd, syncobjs[i]);
> +	}
> +}
> +
> +igt_main
> +{
> +	igt_fixture {
> +		struct drm_xe_sync sync = {
> +			.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
> +			.flags = DRM_XE_SYNC_FLAG_SIGNAL,
> +		};
> +		struct drm_xe_vm_bind_op bind_ops[2] = { };
> +
> +		int fd = drm_open_driver_render(DRIVER_XE);
> +		xe = xe_device_get(fd);
> +
> +		/* Before test, ensure devcoredump is empty */
> +		read_and_clear_devcore(false);
Better:
		clear_devcore();

> +
> +		vm = xe_vm_create(fd, 0, 0);
> +		batch_bo = xe_bo_create(fd, vm, xe->default_alignment, system_memory(fd), 0);
> +		batch = xe_bo_map(xe->fd, batch_bo, xe->default_alignment);
> +
> +		userptr = mmap(0, xe->default_alignment, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> +		wmemset(userptr, 0xf1234567, xe->default_alignment / sizeof(wchar_t));
> +
> +		bind_ops[0].op = DRM_XE_VM_BIND_OP_MAP;
> +		bind_ops[0].obj = batch_bo;
> +		bind_ops[0].addr = 0;
> +
> +		bind_ops[1].op = DRM_XE_VM_BIND_OP_MAP_USERPTR;
> +		bind_ops[1].userptr = (size_t)userptr;
> +		bind_ops[1].addr = 1ULL << 40ULL;
> +
> +		bind_ops[0].flags = bind_ops[1].flags = DRM_XE_VM_BIND_FLAG_DUMPABLE;
> +		bind_ops[0].range = bind_ops[1].range = xe->default_alignment;
> +		bind_ops[0].pat_index = bind_ops[1].pat_index = intel_get_pat_idx_wb(fd);
> +
> +		sync.handle = syncobj_create(fd, 0);
> +		xe_vm_bind_array(fd, vm, 0, bind_ops, ARRAY_SIZE(bind_ops), &sync, 1);
> +		syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
> +		syncobj_destroy(fd, sync.handle);
> +
> +	}
> +
> +	igt_describe("Test that hw fault coredump readout works");
> +	igt_subtest("basic") {
> +		do_hang_test(false);
> +
> +		read_and_clear_devcore(true);
Better:
		read_and_clear_devcore();

> +	}
> +
> +	igt_describe("Hang all engines simultaneously");
> +	igt_subtest("all-simultaneously") {
> +		do_hang_test(true);
> +
> +		read_and_clear_devcore(true);

Same here:
		read_and_clear_devcore();

Regards,
Kamil

> +	}
> +}
> +
> diff --git a/tests/meson.build b/tests/meson.build
> index a6a8498e2..cb19627e1 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -279,6 +279,7 @@ intel_xe_progs = [
>  	'xe_create',
>  	'xe_compute',
>  	'xe_copy_basic',
> +	'xe_coredump',
>  	'xe_dma_buf_sync',
>  	'xe_debugfs',
>  	'xe_drm_fdinfo',
> -- 
> 2.40.1
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] tests/intel: Add xe_coredump test
  2024-01-12 16:53   ` Kamil Konieczny
@ 2024-01-15 12:34     ` Maarten Lankhorst
  2024-01-15 15:47       ` Kamil Konieczny
  0 siblings, 1 reply; 8+ messages in thread
From: Maarten Lankhorst @ 2024-01-15 12:34 UTC (permalink / raw)
  To: Kamil Konieczny, igt-dev

Hey,

Den 2024-01-12 kl. 17:53, skrev Kamil Konieczny:
> Hi Maarten,
> On 2024-01-12 at 13:40:43 +0100, Maarten Lankhorst wrote:
>> Add a simple test that forces a GPU hang and then reads the resulting
>> devcoredump file. Map a single userptr and BO, and dump the contents of
>> those.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> ---
>>   tests/intel/xe_coredump.c | 188 ++++++++++++++++++++++++++++++++++++++
>>   tests/meson.build         |   1 +
>>   2 files changed, 189 insertions(+)
>>   create mode 100644 tests/intel/xe_coredump.c
>>
>> diff --git a/tests/intel/xe_coredump.c b/tests/intel/xe_coredump.c
>> new file mode 100644
>> index 000000000..9db79bb1d
>> --- /dev/null
>> +++ b/tests/intel/xe_coredump.c
>> @@ -0,0 +1,188 @@
>> +// SPDX-License-Identifier: MIT
>> +/*
>> + * Copyright © 2023 Intel Corporation
>> + */
>> +
>> +/**
>> + * TEST: Check devcoredump functionality
>> + * Category: Software building block
>> + * Sub-category: devcoredump
>> + * Test category: functionality test
>> + * Run type: BAT
>> + * Description: Test devcoredump functionality and dumping work as intended.
> Document also new subtest(s) here.
>
>> + */
>> +
>> +#include <string.h>
>> +#include <unistd.h>
>> +#include <fcntl.h>
>> +#include <sys/stat.h>
>> +#include <glob.h>
>> +#include <wchar.h>
> It should be sorted alphabetically but unistd.h is somewhat
> special and should be first.
Okay.
>
>> +
>> +#include "igt.h"
>> +#include "igt_device.h"
>> +#include "igt_io.h"
>> +#include "igt_syncobj.h"
>> +#include "igt_sysfs.h"
>> +
>> +#include "intel_pat.h"
>> +
>> +#include "xe_drm.h"
>> +#include "xe/xe_ioctl.h"
>> +#include "xe/xe_query.h"
>> +
>> +static struct xe_device *xe;
>> +static uint32_t batch_bo;
>> +static uint32_t *batch;
>> +static void *userptr;
>> +static uint32_t vm;
>> +
>> +#define MAX_N_ENGINES 32
>> +
>> +/*
>> + * Helper to read and clear devcore.  We want to read it completely to ensure
>> + * we catch any kernel side regressions like:
>> + * https://gitlab.freedesktop.org/drm/msm/-/issues/20
>> + */
>> +
>> +static void
>> +read_and_clear_devcore(bool match)
> --------------------------^^^^
>
> Could you get rid of bool param? It is better to write two
> separate functions, one for clearing and one for dumping.
This function does not really care what the contents of the coredump 
are, just that it can be read without a kernel panic.
>> +{
>> +	glob_t glob_buf = {0};
>> +	int ret, i;
>> +
>> +	ret = glob("/sys/class/devcoredump/devcd*/data", GLOB_NOSORT, NULL, &glob_buf);
>> +	if ((ret == GLOB_NOMATCH) || !glob_buf.gl_pathc)
>> +		return;
>> +
>> +	for (i = 0; i < glob_buf.gl_pathc; i++) {
>> +		char buf[0x1000];
>> +		int fd = open(glob_buf.gl_pathv[i], O_RDWR);
>> +
>> +		if (fd == -1)
>> +			continue;
>> +
>> +		/*
>> +		 * We want to read the entire file but we can throw away the
>> +		 * contents.. we just want to make sure that we exercise the
>> +		 * kernel side codepaths hit when reading the devcore from
>> +		 * sysfs
>> +		 */
>> +		igt_debug("---- begin coredump ----\n");
>> +		do {
>> +			memset(buf, 0, sizeof(buf));
> ----------- ^
> It is better to do it once before loop.

Yeah, except there may be multiple devcoredump capable devices, which is 
why we clear all at the beginning of the test.

I should check the device path instead.

>
>> +			ret = igt_readn(fd, buf, sizeof(buf) - 1);
>> +			igt_debug("%s", buf);
> Consider limiting it to some reasonable size to not clobber CI.
> If you want you could add option to test to write it all.
>
>> +		} while (ret > 0);
>> +
>> +		igt_debug("---- end coredump ----\n");
>> +
>> +		/* Clear the devcore: */
>> +		igt_writen(fd, "1", 1);
> Is it enough to write "1" to devcore to clear it? Or should it
> be read out to get rid off the data?
The readout is to ensure the actual dumping part works succesfully, or 
at least does not cause a kernel panic.
>> +
>> +		close(fd);
>> +		match = false;
> Better do it once, not in loop.
>
>> +	}
>> +
> match = glob_buf.gl_pathc;
> and later
> 	igt_assert_f(match > 0, "No devcoredump found.\n");
>
>> +	globfree(&glob_buf);
>> +	igt_assert_f(!match, "No devcoredump found.\n");
>> +}
>> +
>> +/**
>> + * SUBTEST: %s
> -------------- ^
> When using param you should later give its values but here it is
> overcomplicating things, just name it:
>
>   * SUBTEST: basic
>> + * Description: Hang the GPU, and read out the resulting devcoredump.
> -------------------------------^
> No need for comma "," before "and".
> Write in description what is the difference in second test.
>
>> + * Test category: functionality test
> Put here description for second test:
>
>   * SUBTEST: all-simultaneously
>
> with other fields filled.
Yeah, wasn't sure how to add same description for 2 tests.
>> + */
>> +static void
>> +do_hang_test(bool all)
>> +{
>> +	uint32_t *ptr = batch;
>> +	uint64_t offset = xe->default_alignment - 4;
>> +	struct drm_xe_engine_class_instance *hwe;
>> +	uint32_t engines[MAX_N_ENGINES], syncobjs[MAX_N_ENGINES];
>> +	int i = 0;
>> +
>> +	memset(batch, 0, xe->default_alignment);
>> +	*(ptr++) = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_GTE_SDD;
>> +	*(ptr++) = 1;
>> +	*(ptr++) = offset >> 32;
>> +	*(ptr++) = offset;
>> +	*(ptr++) = MI_BATCH_BUFFER_END;
>> +
>> +	xe_for_each_engine(xe->fd, hwe) {
>> +		struct drm_xe_sync sync = {
>> +			.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
>> +			.flags = DRM_XE_SYNC_FLAG_SIGNAL,
>> +		};
>> +
>> +		sync.handle = syncobjs[i] = syncobj_create(xe->fd, 0);
>> +		engines[i] = xe_exec_queue_create(xe->fd, vm, hwe, 0);
>> +		xe_exec_sync(xe->fd, engines[i], 0, &sync, 1);
>> +		i++;
>> +		if (!all)
>> +			break;
>> +	}
>> +
>> +	/* Ensure all syncobjs are in a failed state */
>> +	while (i--) {
>> +		syncobj_wait(xe->fd, &syncobjs[i], 1, INT64_MAX, 0, NULL);
>> +		syncobj_destroy(xe->fd, syncobjs[i]);
>> +	}
>> +}
>> +
>> +igt_main
>> +{
>> +	igt_fixture {
>> +		struct drm_xe_sync sync = {
>> +			.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
>> +			.flags = DRM_XE_SYNC_FLAG_SIGNAL,
>> +		};
>> +		struct drm_xe_vm_bind_op bind_ops[2] = { };
>> +
>> +		int fd = drm_open_driver_render(DRIVER_XE);
>> +		xe = xe_device_get(fd);
>> +
>> +		/* Before test, ensure devcoredump is empty */
>> +		read_and_clear_devcore(false);
> Better:
> 		clear_devcore();
>
>> +
>> +		vm = xe_vm_create(fd, 0, 0);
>> +		batch_bo = xe_bo_create(fd, vm, xe->default_alignment, system_memory(fd), 0);
>> +		batch = xe_bo_map(xe->fd, batch_bo, xe->default_alignment);
>> +
>> +		userptr = mmap(0, xe->default_alignment, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
>> +		wmemset(userptr, 0xf1234567, xe->default_alignment / sizeof(wchar_t));
>> +
>> +		bind_ops[0].op = DRM_XE_VM_BIND_OP_MAP;
>> +		bind_ops[0].obj = batch_bo;
>> +		bind_ops[0].addr = 0;
>> +
>> +		bind_ops[1].op = DRM_XE_VM_BIND_OP_MAP_USERPTR;
>> +		bind_ops[1].userptr = (size_t)userptr;
>> +		bind_ops[1].addr = 1ULL << 40ULL;
>> +
>> +		bind_ops[0].flags = bind_ops[1].flags = DRM_XE_VM_BIND_FLAG_DUMPABLE;
>> +		bind_ops[0].range = bind_ops[1].range = xe->default_alignment;
>> +		bind_ops[0].pat_index = bind_ops[1].pat_index = intel_get_pat_idx_wb(fd);
>> +
>> +		sync.handle = syncobj_create(fd, 0);
>> +		xe_vm_bind_array(fd, vm, 0, bind_ops, ARRAY_SIZE(bind_ops), &sync, 1);
>> +		syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
>> +		syncobj_destroy(fd, sync.handle);
>> +
>> +	}
>> +
>> +	igt_describe("Test that hw fault coredump readout works");
>> +	igt_subtest("basic") {
>> +		do_hang_test(false);
>> +
>> +		read_and_clear_devcore(true);
> Better:
> 		read_and_clear_devcore();
>
>> +	}
>> +
>> +	igt_describe("Hang all engines simultaneously");
>> +	igt_subtest("all-simultaneously") {
>> +		do_hang_test(true);
>> +
>> +		read_and_clear_devcore(true);
> Same here:
> 		read_and_clear_devcore();

Thanks, I'll try to clear up the testcase.

Cheers,

Maarten

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] tests/intel: Add xe_coredump test
  2024-01-15 12:34     ` Maarten Lankhorst
@ 2024-01-15 15:47       ` Kamil Konieczny
  0 siblings, 0 replies; 8+ messages in thread
From: Kamil Konieczny @ 2024-01-15 15:47 UTC (permalink / raw)
  To: igt-dev

Hi Maarten,
On 2024-01-15 at 13:34:33 +0100, Maarten Lankhorst wrote:
> Hey,
> 
> Den 2024-01-12 kl. 17:53, skrev Kamil Konieczny:
> > Hi Maarten,
> > On 2024-01-12 at 13:40:43 +0100, Maarten Lankhorst wrote:
> > > Add a simple test that forces a GPU hang and then reads the resulting
> > > devcoredump file. Map a single userptr and BO, and dump the contents of
> > > those.
> > > 
> > > Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > > ---
> > >   tests/intel/xe_coredump.c | 188 ++++++++++++++++++++++++++++++++++++++
> > >   tests/meson.build         |   1 +
> > >   2 files changed, 189 insertions(+)
> > >   create mode 100644 tests/intel/xe_coredump.c
> > > 
> > > diff --git a/tests/intel/xe_coredump.c b/tests/intel/xe_coredump.c
> > > new file mode 100644
> > > index 000000000..9db79bb1d
> > > --- /dev/null
> > > +++ b/tests/intel/xe_coredump.c
> > > @@ -0,0 +1,188 @@
> > > +// SPDX-License-Identifier: MIT
> > > +/*
> > > + * Copyright © 2023 Intel Corporation
> > > + */
> > > +
> > > +/**
> > > + * TEST: Check devcoredump functionality
> > > + * Category: Software building block
> > > + * Sub-category: devcoredump
> > > + * Test category: functionality test
> > > + * Run type: BAT
> > > + * Description: Test devcoredump functionality and dumping work as intended.
> > Document also new subtest(s) here.
> > 
> > > + */
> > > +
> > > +#include <string.h>
> > > +#include <unistd.h>
> > > +#include <fcntl.h>
> > > +#include <sys/stat.h>
> > > +#include <glob.h>
> > > +#include <wchar.h>
> > It should be sorted alphabetically but unistd.h is somewhat
> > special and should be first.
> Okay.
> > 
> > > +
> > > +#include "igt.h"
> > > +#include "igt_device.h"
> > > +#include "igt_io.h"
> > > +#include "igt_syncobj.h"
> > > +#include "igt_sysfs.h"
> > > +
> > > +#include "intel_pat.h"
> > > +
> > > +#include "xe_drm.h"
> > > +#include "xe/xe_ioctl.h"
> > > +#include "xe/xe_query.h"
> > > +
> > > +static struct xe_device *xe;
> > > +static uint32_t batch_bo;
> > > +static uint32_t *batch;
> > > +static void *userptr;
> > > +static uint32_t vm;
> > > +
> > > +#define MAX_N_ENGINES 32
> > > +
> > > +/*
> > > + * Helper to read and clear devcore.  We want to read it completely to ensure
> > > + * we catch any kernel side regressions like:
> > > + * https://gitlab.freedesktop.org/drm/msm/-/issues/20
> > > + */
> > > +
> > > +static void
> > > +read_and_clear_devcore(bool match)
> > --------------------------^^^^
> > 
> > Could you get rid of bool param? It is better to write two
> > separate functions, one for clearing and one for dumping.
> This function does not really care what the contents of the coredump are,
> just that it can be read without a kernel panic.

Ok, but imho it should read it after clearing:

preparations:
    clear all devcores
test:
    casue hang
    read devcore

What I was talking about was that preparation step, no need to
read it out in that step. For that you could write clear_devcore()
with only clearing it with the fast method and no igt_asserts() there.
If you prefere you could debug print which devcores you cleared
(e.g. which one were not empty before executing your test).

> > > +{
> > > +	glob_t glob_buf = {0};
> > > +	int ret, i;
> > > +
> > > +	ret = glob("/sys/class/devcoredump/devcd*/data", GLOB_NOSORT, NULL, &glob_buf);
> > > +	if ((ret == GLOB_NOMATCH) || !glob_buf.gl_pathc)
> > > +		return;
> > > +
> > > +	for (i = 0; i < glob_buf.gl_pathc; i++) {
> > > +		char buf[0x1000];
> > > +		int fd = open(glob_buf.gl_pathv[i], O_RDWR);
> > > +
> > > +		if (fd == -1)
> > > +			continue;
> > > +
> > > +		/*
> > > +		 * We want to read the entire file but we can throw away the
> > > +		 * contents.. we just want to make sure that we exercise the
> > > +		 * kernel side codepaths hit when reading the devcore from
> > > +		 * sysfs
> > > +		 */
> > > +		igt_debug("---- begin coredump ----\n");
> > > +		do {
> > > +			memset(buf, 0, sizeof(buf));
> > ----------- ^
> > It is better to do it once before loop.
> 
> Yeah, except there may be multiple devcoredump capable devices, which is why
> we clear all at the beginning of the test.

Here I was talking about memset before reading into buf, it seems I should
look into igt_readn. Simplify it into:

    do { /* read devcore and write it with igt_debug() */
			ret = igt_readn(fd, buf, sizeof(buf) - 2);
            if (ret <= 0)
                break;
            buf[ret + 1] = 0;
	  		igt_debug("%s", buf);
    } while(true);

Regards,
Kamil

> 
> I should check the device path instead.
> 
> > 
> > > +			ret = igt_readn(fd, buf, sizeof(buf) - 1);
> > > +			igt_debug("%s", buf);
> > Consider limiting it to some reasonable size to not clobber CI.
> > If you want you could add option to test to write it all.
> > 
> > > +		} while (ret > 0);
> > > +
> > > +		igt_debug("---- end coredump ----\n");
> > > +
> > > +		/* Clear the devcore: */
> > > +		igt_writen(fd, "1", 1);
> > Is it enough to write "1" to devcore to clear it? Or should it
> > be read out to get rid off the data?
> The readout is to ensure the actual dumping part works succesfully, or at
> least does not cause a kernel panic.
> > > +
> > > +		close(fd);
> > > +		match = false;
> > Better do it once, not in loop.
> > 
> > > +	}
> > > +
> > match = glob_buf.gl_pathc;
> > and later
> > 	igt_assert_f(match > 0, "No devcoredump found.\n");
> > 
> > > +	globfree(&glob_buf);
> > > +	igt_assert_f(!match, "No devcoredump found.\n");
> > > +}
> > > +
> > > +/**
> > > + * SUBTEST: %s
> > -------------- ^
> > When using param you should later give its values but here it is
> > overcomplicating things, just name it:
> > 
> >   * SUBTEST: basic
> > > + * Description: Hang the GPU, and read out the resulting devcoredump.
> > -------------------------------^
> > No need for comma "," before "and".
> > Write in description what is the difference in second test.
> > 
> > > + * Test category: functionality test
> > Put here description for second test:
> > 
> >   * SUBTEST: all-simultaneously
> > 
> > with other fields filled.
> Yeah, wasn't sure how to add same description for 2 tests.
> > > + */
> > > +static void
> > > +do_hang_test(bool all)
> > > +{
> > > +	uint32_t *ptr = batch;
> > > +	uint64_t offset = xe->default_alignment - 4;
> > > +	struct drm_xe_engine_class_instance *hwe;
> > > +	uint32_t engines[MAX_N_ENGINES], syncobjs[MAX_N_ENGINES];
> > > +	int i = 0;
> > > +
> > > +	memset(batch, 0, xe->default_alignment);
> > > +	*(ptr++) = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_GTE_SDD;
> > > +	*(ptr++) = 1;
> > > +	*(ptr++) = offset >> 32;
> > > +	*(ptr++) = offset;
> > > +	*(ptr++) = MI_BATCH_BUFFER_END;
> > > +
> > > +	xe_for_each_engine(xe->fd, hwe) {
> > > +		struct drm_xe_sync sync = {
> > > +			.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
> > > +			.flags = DRM_XE_SYNC_FLAG_SIGNAL,
> > > +		};
> > > +
> > > +		sync.handle = syncobjs[i] = syncobj_create(xe->fd, 0);
> > > +		engines[i] = xe_exec_queue_create(xe->fd, vm, hwe, 0);
> > > +		xe_exec_sync(xe->fd, engines[i], 0, &sync, 1);
> > > +		i++;
> > > +		if (!all)
> > > +			break;
> > > +	}
> > > +
> > > +	/* Ensure all syncobjs are in a failed state */
> > > +	while (i--) {
> > > +		syncobj_wait(xe->fd, &syncobjs[i], 1, INT64_MAX, 0, NULL);
> > > +		syncobj_destroy(xe->fd, syncobjs[i]);
> > > +	}
> > > +}
> > > +
> > > +igt_main
> > > +{
> > > +	igt_fixture {
> > > +		struct drm_xe_sync sync = {
> > > +			.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
> > > +			.flags = DRM_XE_SYNC_FLAG_SIGNAL,
> > > +		};
> > > +		struct drm_xe_vm_bind_op bind_ops[2] = { };
> > > +
> > > +		int fd = drm_open_driver_render(DRIVER_XE);
> > > +		xe = xe_device_get(fd);
> > > +
> > > +		/* Before test, ensure devcoredump is empty */
> > > +		read_and_clear_devcore(false);
> > Better:
> > 		clear_devcore();
> > 
> > > +
> > > +		vm = xe_vm_create(fd, 0, 0);
> > > +		batch_bo = xe_bo_create(fd, vm, xe->default_alignment, system_memory(fd), 0);
> > > +		batch = xe_bo_map(xe->fd, batch_bo, xe->default_alignment);
> > > +
> > > +		userptr = mmap(0, xe->default_alignment, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> > > +		wmemset(userptr, 0xf1234567, xe->default_alignment / sizeof(wchar_t));
> > > +
> > > +		bind_ops[0].op = DRM_XE_VM_BIND_OP_MAP;
> > > +		bind_ops[0].obj = batch_bo;
> > > +		bind_ops[0].addr = 0;
> > > +
> > > +		bind_ops[1].op = DRM_XE_VM_BIND_OP_MAP_USERPTR;
> > > +		bind_ops[1].userptr = (size_t)userptr;
> > > +		bind_ops[1].addr = 1ULL << 40ULL;
> > > +
> > > +		bind_ops[0].flags = bind_ops[1].flags = DRM_XE_VM_BIND_FLAG_DUMPABLE;
> > > +		bind_ops[0].range = bind_ops[1].range = xe->default_alignment;
> > > +		bind_ops[0].pat_index = bind_ops[1].pat_index = intel_get_pat_idx_wb(fd);
> > > +
> > > +		sync.handle = syncobj_create(fd, 0);
> > > +		xe_vm_bind_array(fd, vm, 0, bind_ops, ARRAY_SIZE(bind_ops), &sync, 1);
> > > +		syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
> > > +		syncobj_destroy(fd, sync.handle);
> > > +
> > > +	}
> > > +
> > > +	igt_describe("Test that hw fault coredump readout works");
> > > +	igt_subtest("basic") {
> > > +		do_hang_test(false);
> > > +
> > > +		read_and_clear_devcore(true);
> > Better:
> > 		read_and_clear_devcore();
> > 
> > > +	}
> > > +
> > > +	igt_describe("Hang all engines simultaneously");
> > > +	igt_subtest("all-simultaneously") {
> > > +		do_hang_test(true);
> > > +
> > > +		read_and_clear_devcore(true);
> > Same here:
> > 		read_and_clear_devcore();
> 
> Thanks, I'll try to clear up the testcase.
> 
> Cheers,
> 
> Maarten
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2024-01-15 15:47 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-01-12 12:40 [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Maarten Lankhorst
2024-01-12 12:40 ` [PATCH 2/2] tests/intel: Add xe_coredump test Maarten Lankhorst
2024-01-12 16:53   ` Kamil Konieczny
2024-01-15 12:34     ` Maarten Lankhorst
2024-01-15 15:47       ` Kamil Konieczny
2024-01-12 13:09 ` [PATCH 1/2] tests/intel: Sync headers to grab DUMPABLE changes Francois Dugast
2024-01-12 13:39 ` ✗ Fi.CI.BUILD: failure for series starting with [1/2] " Patchwork
2024-01-12 16:05 ` [PATCH 1/2] " Kamil Konieczny

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox