From: Carlos Santa <carlos.santa@intel.com>
To: igt-dev@lists.freedesktop.org
Cc: Ursulin Tvrtko <tvrtko.ursulin@intel.com>
Subject: [igt-dev] [RFC v2] tests/gem_watchdog: Initial set of tests for GPU watchdog
Date: Thu, 18 Apr 2019 09:53:37 -0700 [thread overview]
Message-ID: <20190418165337.23438-1-carlos.santa@intel.com> (raw)
This test adds basic set of tests to reset the different
GPU engines through the gpu watchdog timer.
Credits to Antonio for the original codebase this is based on.
v2: remove gem_context_get_param() during set (Antonio)
remove clearing of the engines_threshold[] in the default case
inside context_set_watchdog(). (Antonio)
fix indexing when creating low/high priority contexts
get rid of 2 threads idea (Antonio)
fix context prio bug due to wrong indexing (Antonio)
Cc: Ursulin Tvrtko <tvrtko.ursulin@intel.com>
Cc: Antonio Argenziano <antonio.argenziano@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Carlos Santa <carlos.santa@intel.com>
---
tests/Makefile.sources | 3 +
tests/i915/gem_watchdog.c | 366 ++++++++++++++++++++++++++++++++++++++++++++++
tests/meson.build | 1 +
3 files changed, 370 insertions(+)
create mode 100644 tests/i915/gem_watchdog.c
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 214698d..7f17f20 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -444,6 +444,9 @@ gem_userptr_blits_SOURCES = i915/gem_userptr_blits.c
TESTS_progs += gem_wait
gem_wait_SOURCES = i915/gem_wait.c
+TESTS_progs += gem_watchdog
+gem_watchdog_SOURCES = i915/gem_watchdog.c
+
TESTS_progs += gem_workarounds
gem_workarounds_SOURCES = i915/gem_workarounds.c
diff --git a/tests/i915/gem_watchdog.c b/tests/i915/gem_watchdog.c
new file mode 100644
index 0000000..e6c1abe
--- /dev/null
+++ b/tests/i915/gem_watchdog.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "igt.h"
+#include "igt_sysfs.h"
+#include "sw_sync.h"
+
+#include <pthread.h>
+#include <fcntl.h>
+
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/signal.h>
+#include "i915/gem_ring.h"
+
+#define LOCAL_I915_EXEC_BSD_SHIFT (13)
+#define LOCAL_I915_EXEC_BSD_RING1 (1 << LOCAL_I915_EXEC_BSD_SHIFT)
+#define LOCAL_I915_EXEC_BSD_RING2 (2 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY
+#define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
+#define HIGH 1
+#define LOW 0
+#define WATCHDOG_THRESHOLD (100)
+#define MAX_ENGINES 5
+#define RENDER_CLASS 0
+#define VIDEO_DECODE_CLASS 1
+#define VIDEO_ENHANCEMENT_CLASS 2
+#define COPY_ENGINE_CLASS 3
+#define LOCAL_I915_CONTEXT_PARAM_WATCHDOG 0x10
+
+const uint64_t timeout_100ms = 100000000LL;
+int num;
+
+struct drm_i915_gem_watchdog_timeout {
+ union {
+ struct {
+ /*
+ * Engine class & instance to be configured or queried.
+ */
+ __u16 engine_class;
+ __u16 engine_instance;
+ };
+ /* Index based addressing mode */
+ __u32 index;
+ };
+ /* GPU Engine watchdog resets timeout in us */
+ __u32 timeout_us;
+};
+
+static void clear_error_state(int fd)
+{
+ int dir;
+
+ dir = igt_sysfs_open(fd);
+
+ if (dir < 0)
+ return;
+
+ /* Any write to the error state clears it */
+ igt_sysfs_set(dir, "error", "");
+ close(dir);
+}
+
+static void context_set_watchdog(int fd, int engine_id,
+ unsigned ctx_id, unsigned threshold)
+{
+ struct drm_i915_gem_watchdog_timeout engines_threshold[MAX_ENGINES];
+ struct drm_i915_gem_context_param arg = {
+ .param = LOCAL_I915_CONTEXT_PARAM_WATCHDOG,
+ .ctx_id = ctx_id,
+ .size = sizeof(engines_threshold),
+ .value = (uint64_t)&engines_threshold
+ };
+
+ memset(&engines_threshold, 0, sizeof(engines_threshold));
+
+ switch (engine_id & I915_EXEC_RING_MASK) {
+ case I915_EXEC_RENDER:
+ engines_threshold[RENDER_CLASS].timeout_us = threshold;
+ engines_threshold[VIDEO_DECODE_CLASS].timeout_us = 0;
+ engines_threshold[VIDEO_ENHANCEMENT_CLASS].timeout_us = 0;
+ break;
+ case I915_EXEC_BSD:
+ engines_threshold[RENDER_CLASS].timeout_us = 0;
+ engines_threshold[VIDEO_DECODE_CLASS].timeout_us = threshold;
+ engines_threshold[VIDEO_ENHANCEMENT_CLASS].timeout_us = 0;
+ break;
+ case I915_EXEC_VEBOX:
+ engines_threshold[RENDER_CLASS].timeout_us = 0;
+ engines_threshold[VIDEO_DECODE_CLASS].timeout_us = 0;
+ engines_threshold[VIDEO_ENHANCEMENT_CLASS].timeout_us = threshold;
+ break;
+ default:
+ break;
+ }
+
+ gem_context_set_param(fd, &arg);
+}
+
+static void batch_buffer_factory(uint32_t fd, uint32_t ctx_id, unsigned exec_id, uint32_t target, uint32_t offset, uint32_t *handle, uint64_t timeout, int *fence, int fence_index)
+{
+ struct drm_i915_gem_exec_object2 obj[2];
+ struct drm_i915_gem_relocation_entry reloc;
+ struct drm_i915_gem_execbuffer2 execbuf;
+ igt_spin_t *spin = NULL;
+ const uint32_t bbe = MI_BATCH_BUFFER_END;
+ int i = 0;
+
+ gem_quiescent_gpu(fd);
+
+ memset(&execbuf, 0, sizeof(execbuf));
+ memset(&obj, 0, sizeof(obj));
+ memset(&reloc, 0, sizeof(reloc));
+
+ execbuf.buffers_ptr = to_user_pointer(obj);
+
+ execbuf.buffer_count = 2;
+ execbuf.flags = exec_id | I915_EXEC_FENCE_OUT ;
+
+ obj[0].handle = target;
+ obj[1].handle = gem_create(fd, 4096);
+
+ obj[1].relocation_count = 1;
+ obj[1].relocs_ptr = to_user_pointer(&reloc);
+
+ reloc.target_handle = obj[0].handle;
+ reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
+ reloc.write_domain = I915_GEM_DOMAIN_COMMAND;
+ reloc.delta = offset * sizeof(uint32_t);
+
+ reloc.offset = i * sizeof(uint32_t);
+ gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
+ __sync_synchronize();
+
+ if (handle) {
+ *handle = obj[1].handle;
+ return;
+ }
+
+ gem_sync(fd, obj[1].handle);
+ execbuf.rsvd1 = ctx_id;
+ execbuf.rsvd2 = -1;
+
+ spin = igt_spin_batch_new(fd, .dependency = obj[0].handle);
+ igt_spin_batch_set_timeout(spin, timeout);
+ igt_assert(gem_bo_busy(fd, obj[0].handle));
+
+ gem_execbuf_wr(fd, &execbuf);
+ igt_spin_batch_free(fd, spin);
+
+ fence[fence_index] = execbuf.rsvd2 >> 32;
+
+ gem_close(fd, obj[1].handle);
+ gem_quiescent_gpu(fd);
+}
+
+static uint32_t create_ctx_with_priority(int fd, int ctx_prio)
+{
+ uint32_t ctx = gem_context_create(fd);
+
+ switch (ctx_prio) {
+ case HIGH:
+ __gem_context_set_priority(fd, ctx, MAX_PRIO);
+ igt_info("Setting MAX priority %d\n", ctx_prio);
+ break;
+ case LOW:
+ __gem_context_set_priority(fd, ctx, MIN_PRIO);
+ igt_info("Setting MIN priority %d\n", ctx_prio);
+ break;
+ default:
+ igt_info("Ignoring context priority %d\n", ctx_prio);
+ break;
+ }
+ printf("ctx id: %u\n",ctx);
+ return ctx;
+}
+
+static void inject_hang(uint32_t fd, unsigned ring, uint32_t ctx_id, unsigned flags)
+{
+ igt_hang_t hang;
+ hang = igt_hang_ctx(fd, ctx_id, ring, flags);
+ gem_sync(fd, hang.spin->handle);
+}
+
+static void gpu_watchdog_long_batch_2_contexts(int fd, int nengine, int prio_ctx1, int prio_ctx2)
+{
+ uint32_t ctx[2];
+ uint32_t scratch[2];
+ unsigned flags = HANG_ALLOW_CAPTURE;
+ const uint64_t batch_timeout_ms = timeout_100ms * 3;
+ int i = 0, engine_id;
+ int *fence = 0;
+
+ igt_require(nengine);
+
+ fence = (int *)malloc(sizeof(int)*2);
+
+ if (!fence) {
+ igt_info("Out of memory\n");
+ exit(1);
+ }
+
+ for (i = 0; i < 2; i++) {
+ scratch[i] = gem_create(fd, 4096);
+ }
+
+ /* Create some work on RCS0 */
+ engine_id = 1;
+ ctx[0] = create_ctx_with_priority(fd, prio_ctx1);
+ batch_buffer_factory(fd, ctx[0], engine_id, scratch[0], 0, NULL, batch_timeout_ms, fence, 0);
+
+ /* Cancel batch on RCS0 w/ gpu watchdog timeout */
+ if(prio_ctx1 < 0 && prio_ctx2 < 0) {
+ context_set_watchdog(fd, engine_id, ctx[0], WATCHDOG_THRESHOLD);
+ clear_error_state(fd);
+ inject_hang(fd, engine_id, ctx[0], flags);
+ }
+
+#if 0
+ /* Now check the engine was reset successfully*/
+ igt_assert_eq(sync_fence_status(*fence), -EIO);
+#endif
+ close(fence[0]);
+
+ /* Create some work on VECS0 */
+ engine_id = 4;
+ ctx[1] = create_ctx_with_priority(fd, prio_ctx2);
+ batch_buffer_factory(fd, ctx[1], engine_id, scratch[1], 0, NULL, batch_timeout_ms, fence, 1);
+
+ /* Cancel batch on RCS0 w/ gpu watchdog timeout */
+ context_set_watchdog(fd, engine_id, ctx[1], WATCHDOG_THRESHOLD);
+ clear_error_state(fd);
+ inject_hang(fd, engine_id, ctx[1], flags);
+
+#if 0
+ /* Now check the engine was reset successfully */
+ igt_assert_eq(sync_fence_status(*fence), -EIO);
+#endif
+ close(fence[1]);
+
+ for (i = 0; i < 2; i++) {
+ gem_context_destroy(fd, ctx[i]);
+ gem_close(fd, scratch[i]);
+ }
+}
+
+static void gpu_watchodg_hang_long_batch_single_engine(int fd, unsigned engine_id, const char *name)
+{
+ uint32_t ctx[16];
+ uint32_t scratch[16];
+ int *fence;
+ unsigned nengine = 0;
+ unsigned engine;
+
+ int i;
+ unsigned flags = HANG_ALLOW_CAPTURE;
+ const uint64_t batch_timeout_ms = timeout_100ms*4;
+
+ fence = (int *)malloc(sizeof(int)*16);
+
+ if (!fence) {
+ igt_info("Out of memory\n");
+ exit(1);
+ }
+
+ for_each_physical_engine(fd, engine) {
+ /* no support for gpu watchdog on BLT */
+ if ( strncmp(e__->name, "blt", 3) == 0 )
+ continue;
+
+ scratch[nengine] = gem_create(fd, 4096);
+ ctx[nengine] = create_ctx_with_priority(fd, -1);
+
+ /* Create some work on the engine using the same ctx*/
+ batch_buffer_factory(fd, ctx[nengine], e__->exec_id, scratch[nengine], 0, NULL, batch_timeout_ms, fence, nengine);
+
+ /* Set the gpu watchdog timeout */
+ context_set_watchdog(fd, e__->exec_id, ctx[nengine], WATCHDOG_THRESHOLD);
+ clear_error_state(fd);
+
+ /* Cancel only the batch requested */
+ if ( strncmp(e__->name, name, 4) == 0 )
+ inject_hang(fd, e__->exec_id, ctx[nengine], flags);
+#if 0
+ igt_info("fence:%d, fence status : %d EIO: %d ctx_id:%d\n",fence[nengine], sync_fence_status(fence[nengine]),-EIO, ctx[nengine]);
+ /* Now check the engine was reset */
+ igt_assert_eq(sync_fence_status(fence[nengine]), -EIO);
+#endif
+ nengine++;
+ }
+
+ for (i = 0; i < nengine; i++) {
+ close(fence[i]);
+ gem_context_destroy(fd, ctx[i]);
+ gem_close(fd, scratch[i]);
+ }
+}
+
+igt_main
+{
+ int fd;
+ unsigned int nengine = 0;
+ unsigned int engine;
+
+ igt_skip_on_simulation();
+
+ igt_fixture {
+ fd = drm_open_driver(DRIVER_INTEL);
+ igt_require_gem(fd);
+
+ for_each_physical_engine(fd, engine)
+ nengine++;
+ igt_require(nengine);
+ }
+
+ igt_subtest_group {
+
+ igt_subtest_f("low-prio-ctx-wo-gpu-watchdog-and-high-prio-ctx-with-gpu-watchdog") {
+ int prio1 = LOW;
+ int prio2 = HIGH;
+ gpu_watchdog_long_batch_2_contexts(fd, nengine, prio1, prio2);
+ }
+
+ for (const struct intel_execution_engine *e = intel_execution_engines; e->name; e++) {
+ /* no support for gpu watchdog on BLT */
+ if (e->exec_id == 0 || e->exec_id == I915_EXEC_BLT)
+ continue;
+
+ igt_subtest_f("gpu-watchdog-long-batch-%s", e->name) {
+ igt_require(gem_ring_has_physical_engine(fd, e->exec_id | e->flags));
+ printf("below id: %d\n",e->exec_id);
+ gpu_watchodg_hang_long_batch_single_engine(fd, e->exec_id | e->flags, e->name);
+ }
+ }
+
+ igt_subtest_f("gpu-watchdog-long-batch-2-contexts") {
+ int prio = -1;
+ gpu_watchdog_long_batch_2_contexts(fd, nengine, prio, prio);
+ }
+ }
+
+ igt_fixture {
+ close(fd);
+ }
+}
diff --git a/tests/meson.build b/tests/meson.build
index 5167a6c..b281b75 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -210,6 +210,7 @@ i915_progs = [
'gem_unref_active_buffers',
'gem_userptr_blits',
'gem_wait',
+ 'gem_watchdog',
'gem_workarounds',
'gem_write_read_ring_switch',
'i915_fb_tiling',
--
2.7.4
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
next reply other threads:[~2019-04-18 16:54 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-18 16:53 Carlos Santa [this message]
2019-04-18 17:39 ` [igt-dev] ✗ Fi.CI.BAT: failure for tests/gem_watchdog: Initial set of tests for GPU watchdog (rev4) Patchwork
2019-04-19 21:29 ` [igt-dev] [RFC v2] tests/gem_watchdog: Initial set of tests for GPU watchdog Antonio Argenziano
2019-04-23 0:04 ` Carlos Santa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190418165337.23438-1-carlos.santa@intel.com \
--to=carlos.santa@intel.com \
--cc=igt-dev@lists.freedesktop.org \
--cc=tvrtko.ursulin@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox