From: Peter Senna Tschudin <peter.senna@linux.intel.com>
To: Sobin Thomas <sobin.thomas@intel.com>,
igt-dev@lists.freedesktop.org, matthew.brost@intel.com
Subject: Re: [PATCH i-g-t 1/1] RFC tests/intel/xe_exec_reset: Filter expected timeout dmesg during reset tests
Date: Thu, 29 Jan 2026 10:29:26 +0100 [thread overview]
Message-ID: <7f8b8095a03416aa48b9caf68495c67f6cb74438.camel@linux.intel.com> (raw)
In-Reply-To: <20260123065238.48129-2-sobin.thomas@intel.com>
Hi Sobin,
Please see my comments bellow.
On Fri, 2026-01-23 at 06:52 +0000, Sobin Thomas wrote:
> During reset testing there are timedout messages with lrc seq no
> and seqno coming in CI Dmesg. These logs are causing CI warnings.
>
> Since we are intentionally causing the GPU reset, so timeout messages
> are expected behavior rather than actual test failures. These
> messages
> filtered by CI and incorrectly flagged as errors.
>
> This change adds ignore_timeout_dmesg() function that registers a
> regex
> pattern to filter out expected timeout-related dmesg messages:
> - "Timedout"
> - "timeout"
>
> The function is strategically called before operations that trigger
> resets to proactively filter expected messages:
> - GT reset operations (xe_force_gt_reset_async/sync)
> - Legacy test modes involving resets
> - Compute mode tests with GT reset flags
> - Thread-based reset testing scenarios
>
> This ensures cleaner test output by suppressing expected noise while
> preserving genuine error reporting for actual test failures.
>
> Signed-off-by: Sobin Thomas <sobin.thomas@intel.com>
> ---
> tests/intel/xe_exec_reset.c | 49 +++++++++++++++++++++++++++++++----
> --
> 1 file changed, 41 insertions(+), 8 deletions(-)
>
> diff --git a/tests/intel/xe_exec_reset.c
> b/tests/intel/xe_exec_reset.c
> index 7aaee31dd..19b2c96b9 100644
> --- a/tests/intel/xe_exec_reset.c
> +++ b/tests/intel/xe_exec_reset.c
> @@ -28,6 +28,17 @@
>
> #define SYNC_OBJ_SIGNALED (0x1 << 0)
> #define LEGACY_MODE_ADDR 0x1a0000
> +static void ignore_timeout_dmesg(void)
> +{
> + /*
> + * Timedout jobs are expected during reset testing,
> + * so ignore these in igt_runner.
> + */
> + static const char *store = "Timedout|timeout";
> +
> + igt_emit_ignore_dmesg_regex(store);
> +}
> +
This will cause igt to ignore all timeouts. Would it be a good idea to
make this more specific so that only the expected timeouts are ignored?
>
> /**
> * SUBTEST: spin
> @@ -73,6 +84,7 @@ static void test_spin(int fd, struct
> drm_xe_engine_class_instance *eci,
>
> sync[0].handle = syncobj_create(fd, 0);
> xe_vm_bind_async(fd, vm, 0, bo, 0, addr, bo_size, sync, 1);
> + ignore_timeout_dmesg();
>
> #define N_TIMES 4
> for (i = 0; i < N_TIMES; ++i) {
> @@ -260,8 +272,10 @@ test_balancer(int fd, int gt, int class, int
> n_exec_queues, int n_execs,
>
> }
>
> - if (flags & GT_RESET)
> + if (flags & GT_RESET) {
> + ignore_timeout_dmesg();
> xe_force_gt_reset_async(fd, gt);
> + }
>
> if (flags & CLOSE_FD) {
> if (flags & CLOSE_EXEC_QUEUES) {
> @@ -446,6 +460,7 @@ test_compute_mode(int fd, struct
> drm_xe_engine_class_instance *eci,
> }
>
> if (flags & GT_RESET) {
> + ignore_timeout_dmesg();
> xe_spin_wait_started(&data[0].spin);
> xe_force_gt_reset_sync(fd, eci->gt_id);
> }
> @@ -590,6 +605,7 @@ gt_reset(int fd, int n_threads, int n_sec)
>
> pthread_mutex_init(&mutex, 0);
> pthread_cond_init(&cond, 0);
> + ignore_timeout_dmesg();
>
> for (i = 0; i < n_threads; ++i) {
> threads[i].mutex = &mutex;
> @@ -650,6 +666,7 @@ gt_mocs_reset(int fd, int gt)
> igt_debugfs_dump(fd, path);
> igt_debugfs_read(fd, path, mocs_content_pre);
>
> + ignore_timeout_dmesg();
> xe_force_gt_reset_sync(fd, gt);
>
> igt_assert(igt_debugfs_exists(fd, path, O_RDONLY));
> @@ -683,6 +700,7 @@ static void *thread(void *data)
> pthread_cond_wait(t->cond, t->mutex);
> pthread_mutex_unlock(t->mutex);
>
> + ignore_timeout_dmesg();
> xe_legacy_test_mode(t->fd, t->hwe, t->n_exec_queue, t-
> >n_exec,
> t->flags, LEGACY_MODE_ADDR, false);
>
> @@ -739,6 +757,7 @@ static void threads(int fd, int n_exec_queues,
> int n_execs, unsigned int flags)
> pthread_mutex_init(&mutex, 0);
> pthread_cond_init(&cond, 0);
>
> + ignore_timeout_dmesg();
> xe_for_each_engine(fd, hwe) {
> if (hwe->gt_id && (flags & GT0))
> continue;
> @@ -797,12 +816,15 @@ int igt_main()
> test_spin(fd, hwe, SYNC_OBJ_SIGNALED);
>
> igt_subtest("cat-error")
> - xe_for_each_engine(fd, hwe)
> + xe_for_each_engine(fd, hwe) {
> + ignore_timeout_dmesg();
> xe_legacy_test_mode(fd, hwe, 2, 2,
> CAT_ERROR,
> LEGACY_MODE_ADDR,
> false);
> + }
>
> igt_subtest("cancel")
> xe_for_each_engine(fd, hwe) {
> + ignore_timeout_dmesg();
> xe_legacy_test_mode(fd, hwe, 1, 1, 0,
> LEGACY_MODE_ADDR,
> false);
> break;
> @@ -810,6 +832,7 @@ int igt_main()
>
> igt_subtest("cancel-preempt")
> xe_for_each_engine(fd, hwe) {
> + ignore_timeout_dmesg();
> xe_legacy_test_mode(fd, hwe, 1, 1, PREEMPT,
> LEGACY_MODE_ADDR,
> false);
> break;
> @@ -897,25 +920,33 @@ int igt_main()
> LONG_SPIN_REUSE_QUEUE);
>
> igt_subtest("gt-reset")
> - xe_for_each_engine(fd, hwe)
> + xe_for_each_engine(fd, hwe) {
> + ignore_timeout_dmesg();
> xe_legacy_test_mode(fd, hwe, 2, 2, GT_RESET,
> LEGACY_MODE_ADDR,
> false);
> + }
>
> igt_subtest("close-fd-no-exec")
> - xe_for_each_engine(fd, hwe)
> + xe_for_each_engine(fd, hwe) {
> + ignore_timeout_dmesg();
> xe_legacy_test_mode(-1, hwe, 16, 0,
> CLOSE_FD,
> LEGACY_MODE_ADDR,
> false);
> + }
>
> igt_subtest("close-fd")
> - xe_for_each_engine(fd, hwe)
> + xe_for_each_engine(fd, hwe) {
> + ignore_timeout_dmesg();
> xe_legacy_test_mode(-1, hwe, 16, 256,
> CLOSE_FD,
> LEGACY_MODE_ADDR,
> false);
> + }
>
> igt_subtest("close-execqueues-close-fd")
> - xe_for_each_engine(fd, hwe)
> + xe_for_each_engine(fd, hwe) {
> + ignore_timeout_dmesg();
> xe_legacy_test_mode(-1, hwe, 16, 256,
> CLOSE_FD |
> CLOSE_EXEC_QUEUES,
> LEGACY_MODE_ADDR,
> false);
> + }
>
> igt_subtest("cm-cat-error")
> xe_for_each_engine(fd, hwe)
> @@ -941,17 +972,19 @@ int igt_main()
> for (const struct section *s = sections; s->name; s++) {
> igt_subtest_f("%s-cat-error", s->name)
> xe_for_each_gt(fd, gt)
The changes after this point should either be on a separate patch or
simply removed.
> - xe_for_each_engine_class(class)
> + xe_for_each_engine_class(class) {
> test_balancer(fd, gt, class,
> XE_MAX_ENGINE_INSTANCE + 1,
>
> XE_MAX_ENGINE_INSTANCE + 1,
> CAT_ERROR | s-
> >flags);
> + }
>
> igt_subtest_f("%s-gt-reset", s->name)
> xe_for_each_gt(fd, gt)
> - xe_for_each_engine_class(class)
> + xe_for_each_engine_class(class) {
> test_balancer(fd, gt, class,
> XE_MAX_ENGINE_INSTANCE + 1,
>
> XE_MAX_ENGINE_INSTANCE + 1,
> GT_RESET | s-
> >flags);
> + }
>
> igt_subtest_f("%s-close-fd-no-exec", s->name)
> xe_for_each_gt(fd, gt)
next prev parent reply other threads:[~2026-01-29 9:29 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-23 6:52 [PATCH i-g-t 0/1] RFC tests/intel/xe_exec_reset: Filter expected timeout dmesg during reset tests Sobin Thomas
2026-01-23 6:52 ` [PATCH i-g-t 1/1] " Sobin Thomas
2026-01-29 9:29 ` Peter Senna Tschudin [this message]
2026-01-29 10:45 ` Kamil Konieczny
2026-01-23 8:02 ` ✓ Xe.CI.BAT: success for " Patchwork
2026-01-23 8:19 ` ✓ i915.CI.BAT: " Patchwork
2026-01-23 22:34 ` ✗ Xe.CI.Full: failure " Patchwork
2026-01-26 8:22 ` [PATCH i-g-t 0/1] " Zbigniew Kempczyński
2026-01-26 12:52 ` Thomas, Sobin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7f8b8095a03416aa48b9caf68495c67f6cb74438.camel@linux.intel.com \
--to=peter.senna@linux.intel.com \
--cc=igt-dev@lists.freedesktop.org \
--cc=matthew.brost@intel.com \
--cc=sobin.thomas@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox