* [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner
@ 2018-11-19 15:22 Tvrtko Ursulin
2018-11-19 15:22 ` [igt-dev] [PATCH i-g-t 2/2] tests/gem_exec_await: Add a memory pressure subtest Tvrtko Ursulin
` (3 more replies)
0 siblings, 4 replies; 12+ messages in thread
From: Tvrtko Ursulin @ 2018-11-19 15:22 UTC (permalink / raw)
To: igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Add some nop instructions between recursive batch buffer start calls to
give system some breathing room. Without these, especially when coupled
with memory pressure, false GPU hangs can be observed caused by the
inability of the chip to cope.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106680
---
tests/i915/gem_exec_await.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/tests/i915/gem_exec_await.c b/tests/i915/gem_exec_await.c
index 5cfeb8ec8bfa..3ea5b5903c6b 100644
--- a/tests/i915/gem_exec_await.c
+++ b/tests/i915/gem_exec_await.c
@@ -63,6 +63,7 @@ static void xchg_obj(void *array, unsigned i, unsigned j)
static void wide(int fd, int ring_size, int timeout, unsigned int flags)
{
const uint32_t bbe = MI_BATCH_BUFFER_END;
+ const unsigned int num_nops = 1000;
const int gen = intel_gen(intel_get_drm_devid(fd));
struct {
struct drm_i915_gem_exec_object2 *obj;
@@ -123,7 +124,7 @@ static void wide(int fd, int ring_size, int timeout, unsigned int flags)
exec[e].execbuf.buffer_count = 2;
exec[e].reloc.target_handle = 1; /* recurse */
- exec[e].reloc.offset = sizeof(uint32_t);
+ exec[e].reloc.offset = (1 + num_nops) * sizeof(uint32_t);
exec[e].reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
if (gen < 4)
exec[e].reloc.delta = 1;
@@ -162,7 +163,7 @@ static void wide(int fd, int ring_size, int timeout, unsigned int flags)
gem_set_domain(fd, exec[e].exec[1].handle,
I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
- i = 0;
+ i = num_nops;
exec[e].cmd[i] = MI_BATCH_BUFFER_START;
if (gen >= 8) {
exec[e].cmd[i] |= 1 << 8 | 1;
@@ -200,7 +201,7 @@ static void wide(int fd, int ring_size, int timeout, unsigned int flags)
count += nengine;
for (unsigned e = 0; e < nengine; e++)
- exec[e].cmd[0] = MI_BATCH_BUFFER_END;
+ exec[e].cmd[num_nops] = MI_BATCH_BUFFER_END;
__sync_synchronize();
}
--
2.19.1
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [igt-dev] [PATCH i-g-t 2/2] tests/gem_exec_await: Add a memory pressure subtest
2018-11-19 15:22 [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner Tvrtko Ursulin
@ 2018-11-19 15:22 ` Tvrtko Ursulin
2018-11-19 15:36 ` Chris Wilson
2018-11-19 15:28 ` [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner Chris Wilson
` (2 subsequent siblings)
3 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2018-11-19 15:22 UTC (permalink / raw)
To: igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Memory pressure subtest attempts to provoke system overload which can
cause GPU hangs, especially when combined with spin batches which do
not allow for some nop instructions to provide relief.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
tests/i915/gem_exec_await.c | 107 ++++++++++++++++++++++++++++++++++++
1 file changed, 107 insertions(+)
diff --git a/tests/i915/gem_exec_await.c b/tests/i915/gem_exec_await.c
index 3ea5b5903c6b..ccb5159a6fe1 100644
--- a/tests/i915/gem_exec_await.c
+++ b/tests/i915/gem_exec_await.c
@@ -30,6 +30,11 @@
#include <sys/ioctl.h>
#include <sys/signal.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
@@ -227,6 +232,92 @@ static void wide(int fd, int ring_size, int timeout, unsigned int flags)
free(exec);
}
+struct thread {
+ pthread_t thread;
+ volatile bool done;
+};
+
+static unsigned long get_meminfo(const char *info, const char *tag)
+{
+ const char *str;
+ unsigned long val;
+
+ str = strstr(info, tag);
+ if (str && sscanf(str + strlen(tag), " %lu", &val) == 1)
+ return val >> 10;
+
+ igt_warn("Unrecognised /proc/meminfo field: '%s'\n", tag);
+ return 0;
+}
+
+static unsigned long get_avail_ram_mb(void)
+{
+ int fd;
+ int ret;
+ char buf[4096];
+ unsigned long ram;
+
+ fd = open("/proc/meminfo", O_RDONLY);
+ igt_assert_fd(fd);
+
+ ret = read(fd, buf, sizeof(buf));
+ igt_assert(ret >= 0);
+
+ close(fd);
+
+ ram = get_meminfo(buf, "MemAvailable:");
+ ram += get_meminfo(buf, "Buffers:");
+ ram += get_meminfo(buf, "Cached:");
+ ram += get_meminfo(buf, "SwapCached:");
+
+ return ram;
+}
+
+#define PAGE_SIZE 4096
+static void *mempressure(void *arg)
+{
+ struct thread *thread = arg;
+ const unsigned int sz_mb = 2;
+ const unsigned int sz = sz_mb << 20;
+ unsigned int n = 0, max = 0;
+ unsigned int blocks;
+ void **ptr = NULL;
+
+ while (!thread->done) {
+ unsigned long ram_mb = get_avail_ram_mb();
+
+ if (!ptr) {
+ blocks = ram_mb / sz_mb;
+ ptr = calloc(blocks, sizeof(void *));
+ igt_assert(ptr);
+ } else if (ram_mb < 384) {
+ blocks = max + 1;
+ }
+
+ if (ptr[n])
+ munmap(ptr[n], sz);
+
+ ptr[n] = mmap(NULL, sz, PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ assert(ptr[n] != MAP_FAILED);
+
+ madvise(ptr[n], sz, MADV_HUGEPAGE);
+
+ for (size_t page = 0; page < sz; page += PAGE_SIZE)
+ *(volatile uint32_t *)((unsigned char *)ptr[n] + page) =
+ 0;
+
+ if (n > max)
+ max = n;
+
+ n++;
+
+ if (n >= blocks)
+ n = 0;
+ }
+
+ return NULL;
+}
igt_main
{
int ring_size = 0;
@@ -255,6 +346,22 @@ igt_main
wide(device, ring_size, 20, CONTEXTS);
}
+ igt_subtest("wide-contexts-mempressure") {
+ struct thread thread = { };
+ int ret;
+
+ gem_require_contexts(device);
+
+ ret = pthread_create(&thread.thread, NULL, mempressure,
+ &thread);
+ igt_assert_eq(ret, 0);
+
+ wide(device, ring_size, 20, CONTEXTS);
+
+ thread.done = true;
+ pthread_join(thread.thread, NULL);
+ }
+
igt_fixture {
igt_stop_hang_detector();
close(device);
--
2.19.1
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner
2018-11-19 15:22 [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner Tvrtko Ursulin
2018-11-19 15:22 ` [igt-dev] [PATCH i-g-t 2/2] tests/gem_exec_await: Add a memory pressure subtest Tvrtko Ursulin
@ 2018-11-19 15:28 ` Chris Wilson
2018-11-19 15:33 ` Tvrtko Ursulin
2018-11-19 16:44 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] " Patchwork
2018-11-19 21:52 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
3 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2018-11-19 15:28 UTC (permalink / raw)
To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin
Quoting Tvrtko Ursulin (2018-11-19 15:22:28)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Add some nop instructions between recursive batch buffer start calls to
> give system some breathing room. Without these, especially when coupled
> with memory pressure, false GPU hangs can be observed caused by the
> inability of the chip to cope.
Doesn't seem to be required. And the machines most susceptible to timer
errors due to busyspin have not show the issue.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner
2018-11-19 15:28 ` [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner Chris Wilson
@ 2018-11-19 15:33 ` Tvrtko Ursulin
2018-11-19 16:18 ` Chris Wilson
0 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2018-11-19 15:33 UTC (permalink / raw)
To: Chris Wilson, igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin
On 19/11/2018 15:28, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-11-19 15:22:28)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Add some nop instructions between recursive batch buffer start calls to
>> give system some breathing room. Without these, especially when coupled
>> with memory pressure, false GPU hangs can be observed caused by the
>> inability of the chip to cope.
>
> Doesn't seem to be required. And the machines most susceptible to timer
> errors due to busyspin have not show the issue.
With the memory pressure subtest, the second patch in this series, it
was make it or break it to have the nops. Without them it was GPU hangs
all around, and with them so far all clean.
Regards,
Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [igt-dev] [PATCH i-g-t 2/2] tests/gem_exec_await: Add a memory pressure subtest
2018-11-19 15:22 ` [igt-dev] [PATCH i-g-t 2/2] tests/gem_exec_await: Add a memory pressure subtest Tvrtko Ursulin
@ 2018-11-19 15:36 ` Chris Wilson
2018-11-19 15:54 ` Tvrtko Ursulin
0 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2018-11-19 15:36 UTC (permalink / raw)
To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin
Quoting Tvrtko Ursulin (2018-11-19 15:22:29)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Memory pressure subtest attempts to provoke system overload which can
> cause GPU hangs, especially when combined with spin batches which do
> not allow for some nop instructions to provide relief.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> tests/i915/gem_exec_await.c | 107 ++++++++++++++++++++++++++++++++++++
> 1 file changed, 107 insertions(+)
>
> diff --git a/tests/i915/gem_exec_await.c b/tests/i915/gem_exec_await.c
> index 3ea5b5903c6b..ccb5159a6fe1 100644
> --- a/tests/i915/gem_exec_await.c
> +++ b/tests/i915/gem_exec_await.c
> @@ -30,6 +30,11 @@
>
> #include <sys/ioctl.h>
> #include <sys/signal.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <pthread.h>
> +#include <sched.h>
>
> #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
> #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
> @@ -227,6 +232,92 @@ static void wide(int fd, int ring_size, int timeout, unsigned int flags)
> free(exec);
> }
>
> +struct thread {
> + pthread_t thread;
> + volatile bool done;
> +};
> +
> +static unsigned long get_avail_ram_mb(void)
intel_get_avail_ram_mb() ?
> +#define PAGE_SIZE 4096
> +static void *mempressure(void *arg)
> +{
> + struct thread *thread = arg;
> + const unsigned int sz_mb = 2;
> + const unsigned int sz = sz_mb << 20;
> + unsigned int n = 0, max = 0;
> + unsigned int blocks;
> + void **ptr = NULL;
> +
> + while (!thread->done) {
You can use READ_ONCE(thread->done) here for familiarity.
> + unsigned long ram_mb = get_avail_ram_mb();
> +
> + if (!ptr) {
> + blocks = ram_mb / sz_mb;
> + ptr = calloc(blocks, sizeof(void *));
> + igt_assert(ptr);
> + } else if (ram_mb < 384) {
> + blocks = max + 1;
> + }
> +
> + if (ptr[n])
> + munmap(ptr[n], sz);
> +
> + ptr[n] = mmap(NULL, sz, PROT_WRITE,
> + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> + assert(ptr[n] != MAP_FAILED);
> +
> + madvise(ptr[n], sz, MADV_HUGEPAGE);
> +
> + for (size_t page = 0; page < sz; page += PAGE_SIZE)
> + *(volatile uint32_t *)((unsigned char *)ptr[n] + page) =
> + 0;
> +
> + if (n > max)
> + max = n;
> +
> + n++;
> +
> + if (n >= blocks)
> + n = 0;
Another method would be to use mlock to force exhaustion.
However, as the supposition is that rcu is part of the underlying
mechanism if you fill the dentry cache we'll exercise both the shrinker
and RCU.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [igt-dev] [PATCH i-g-t 2/2] tests/gem_exec_await: Add a memory pressure subtest
2018-11-19 15:36 ` Chris Wilson
@ 2018-11-19 15:54 ` Tvrtko Ursulin
2018-11-19 17:07 ` [Intel-gfx] " Chris Wilson
0 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2018-11-19 15:54 UTC (permalink / raw)
To: Chris Wilson, igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin
On 19/11/2018 15:36, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-11-19 15:22:29)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Memory pressure subtest attempts to provoke system overload which can
>> cause GPU hangs, especially when combined with spin batches which do
>> not allow for some nop instructions to provide relief.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>> tests/i915/gem_exec_await.c | 107 ++++++++++++++++++++++++++++++++++++
>> 1 file changed, 107 insertions(+)
>>
>> diff --git a/tests/i915/gem_exec_await.c b/tests/i915/gem_exec_await.c
>> index 3ea5b5903c6b..ccb5159a6fe1 100644
>> --- a/tests/i915/gem_exec_await.c
>> +++ b/tests/i915/gem_exec_await.c
>> @@ -30,6 +30,11 @@
>>
>> #include <sys/ioctl.h>
>> #include <sys/signal.h>
>> +#include <sys/types.h>
>> +#include <sys/stat.h>
>> +#include <fcntl.h>
>> +#include <pthread.h>
>> +#include <sched.h>
>>
>> #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
>> #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
>> @@ -227,6 +232,92 @@ static void wide(int fd, int ring_size, int timeout, unsigned int flags)
>> free(exec);
>> }
>>
>> +struct thread {
>> + pthread_t thread;
>> + volatile bool done;
>> +};
>> +
>> +static unsigned long get_avail_ram_mb(void)
>
> intel_get_avail_ram_mb() ?
I thought so but when things went slow I looked inside and concluded it
is not suitable.
>> +#define PAGE_SIZE 4096
>> +static void *mempressure(void *arg)
>> +{
>> + struct thread *thread = arg;
>> + const unsigned int sz_mb = 2;
>> + const unsigned int sz = sz_mb << 20;
>> + unsigned int n = 0, max = 0;
>> + unsigned int blocks;
>> + void **ptr = NULL;
>> +
>> + while (!thread->done) {
>
> You can use READ_ONCE(thread->done) here for familiarity.
Okay, didn't realize we copied it to IGT.
>> + unsigned long ram_mb = get_avail_ram_mb();
>> +
>> + if (!ptr) {
>> + blocks = ram_mb / sz_mb;
>> + ptr = calloc(blocks, sizeof(void *));
>> + igt_assert(ptr);
>> + } else if (ram_mb < 384) {
>> + blocks = max + 1;
>> + }
>> +
>> + if (ptr[n])
>> + munmap(ptr[n], sz);
>> +
>> + ptr[n] = mmap(NULL, sz, PROT_WRITE,
>> + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
>> + assert(ptr[n] != MAP_FAILED);
>> +
>> + madvise(ptr[n], sz, MADV_HUGEPAGE);
>> +
>> + for (size_t page = 0; page < sz; page += PAGE_SIZE)
>> + *(volatile uint32_t *)((unsigned char *)ptr[n] + page) =
>> + 0;
>> +
>> + if (n > max)
>> + max = n;
>> +
>> + n++;
>> +
>> + if (n >= blocks)
>> + n = 0;
>
> Another method would be to use mlock to force exhaustion.
>
> However, as the supposition is that rcu is part of the underlying
> mechanism if you fill the dentry cache we'll exercise both the shrinker
> and RCU.
As said in previous reply, in my testing, well at least the one thing I
was able to reproduce and which has the same symptoms as the bug, the
problem went away with the addition of nops.
But yeah, maybe that could be an indirect effect.
Also this cleaned up patch does not cut it any longer. :( I seems I've
lost the magic ingredient to reproduce the stalls during cleanups. I
have to go back and add stuff to get it back.
Regards,
Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner
2018-11-19 15:33 ` Tvrtko Ursulin
@ 2018-11-19 16:18 ` Chris Wilson
2018-11-19 19:18 ` Tvrtko Ursulin
0 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2018-11-19 16:18 UTC (permalink / raw)
To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin
Quoting Tvrtko Ursulin (2018-11-19 15:33:56)
>
> On 19/11/2018 15:28, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-11-19 15:22:28)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> Add some nop instructions between recursive batch buffer start calls to
> >> give system some breathing room. Without these, especially when coupled
> >> with memory pressure, false GPU hangs can be observed caused by the
> >> inability of the chip to cope.
> >
> > Doesn't seem to be required. And the machines most susceptible to timer
> > errors due to busyspin have not show the issue.
>
> With the memory pressure subtest, the second patch in this series, it
> was make it or break it to have the nops. Without them it was GPU hangs
> all around, and with them so far all clean.
First machine bsw, just applying patch 2/2,
IGT-Version: 1.23-gb6b8d829 (x86_64) (Linux: 4.20.0-rc2+ x86_64)
Using Execlists submission
Ring size: 131 batches
Starting subtest: wide-all
wide: 420 cycles: 24121.034us
Subtest wide-all: SUCCESS (47.060s)
Starting subtest: wide-contexts
wide: 340 cycles: 24893.896us
Subtest wide-contexts: SUCCESS (22.265s)
Starting subtest: wide-contexts-mempressure
wide: 232 cycles: 25153.899us
Subtest wide-contexts-mempressure: SUCCESS (23.141s)
:|
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
* [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] tests/gem_exec_await: Relax the busy spinner
2018-11-19 15:22 [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner Tvrtko Ursulin
2018-11-19 15:22 ` [igt-dev] [PATCH i-g-t 2/2] tests/gem_exec_await: Add a memory pressure subtest Tvrtko Ursulin
2018-11-19 15:28 ` [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner Chris Wilson
@ 2018-11-19 16:44 ` Patchwork
2018-11-19 21:52 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
3 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2018-11-19 16:44 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: igt-dev
== Series Details ==
Series: series starting with [i-g-t,1/2] tests/gem_exec_await: Relax the busy spinner
URL : https://patchwork.freedesktop.org/series/52695/
State : success
== Summary ==
= CI Bug Log - changes from IGT_4720 -> IGTPW_2077 =
== Summary - SUCCESS ==
No regressions found.
External URL: https://patchwork.freedesktop.org/api/1.0/series/52695/revisions/1/mbox/
== Possible new issues ==
Here are the unknown changes that may have been introduced in IGTPW_2077:
=== IGT changes ===
==== Warnings ====
igt@kms_busy@basic-flip-a:
{fi-kbl-7567u}: PASS -> SKIP +2
igt@kms_busy@basic-flip-c:
{fi-kbl-7500u}: PASS -> SKIP +2
== Known issues ==
Here are the changes found in IGTPW_2077 that come from known issues:
=== IGT changes ===
==== Issues hit ====
igt@gem_ctx_create@basic-files:
fi-icl-u2: PASS -> DMESG-WARN (fdo#107724)
igt@gem_exec_suspend@basic-s3:
fi-blb-e6850: PASS -> INCOMPLETE (fdo#107718)
igt@i915_selftest@live_contexts:
fi-bsw-kefka: PASS -> DMESG-FAIL (fdo#108656)
igt@i915_selftest@live_hangcheck:
fi-bwr-2160: PASS -> DMESG-FAIL (fdo#108735)
igt@kms_chamelium@dp-hpd-fast:
{fi-kbl-7500u}: PASS -> DMESG-WARN (fdo#103558, fdo#102505, fdo#105602)
igt@kms_frontbuffer_tracking@basic:
fi-hsw-peppy: PASS -> DMESG-WARN (fdo#102614)
igt@kms_pipe_crc_basic@nonblocking-crc-pipe-a:
fi-byt-clapper: PASS -> FAIL (fdo#107362)
igt@kms_pipe_crc_basic@read-crc-pipe-b-frame-sequence:
fi-byt-clapper: PASS -> FAIL (fdo#103191, fdo#107362) +1
igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
fi-icl-u: PASS -> INCOMPLETE (fdo#107713)
==== Possible fixes ====
igt@gem_exec_suspend@basic-s3:
fi-icl-u2: DMESG-WARN (fdo#107724) -> PASS
igt@i915_selftest@live_sanitycheck:
fi-gdg-551: INCOMPLETE (fdo#108789) -> PASS
{name}: This element is suppressed. This means it is ignored when computing
the status of the difference (SUCCESS, WARNING, or FAILURE).
fdo#102505 https://bugs.freedesktop.org/show_bug.cgi?id=102505
fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
fdo#103558 https://bugs.freedesktop.org/show_bug.cgi?id=103558
fdo#105602 https://bugs.freedesktop.org/show_bug.cgi?id=105602
fdo#107362 https://bugs.freedesktop.org/show_bug.cgi?id=107362
fdo#107713 https://bugs.freedesktop.org/show_bug.cgi?id=107713
fdo#107718 https://bugs.freedesktop.org/show_bug.cgi?id=107718
fdo#107724 https://bugs.freedesktop.org/show_bug.cgi?id=107724
fdo#108656 https://bugs.freedesktop.org/show_bug.cgi?id=108656
fdo#108735 https://bugs.freedesktop.org/show_bug.cgi?id=108735
fdo#108789 https://bugs.freedesktop.org/show_bug.cgi?id=108789
== Participating hosts (53 -> 47) ==
Missing (6): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600
== Build changes ==
* IGT: IGT_4720 -> IGTPW_2077
CI_DRM_5159: af98442486c4eeed23ed036dfa2b556def4203bd @ git://anongit.freedesktop.org/gfx-ci/linux
IGTPW_2077: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2077/
IGT_4720: c27aaca295d3ca2a38521e571c012449371e4bb5 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
== Testlist changes ==
+igt@gem_exec_await@wide-contexts-mempressure
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2077/issues.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Intel-gfx] [igt-dev] [PATCH i-g-t 2/2] tests/gem_exec_await: Add a memory pressure subtest
2018-11-19 15:54 ` Tvrtko Ursulin
@ 2018-11-19 17:07 ` Chris Wilson
0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2018-11-19 17:07 UTC (permalink / raw)
To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx
Quoting Tvrtko Ursulin (2018-11-19 15:54:44)
>
> On 19/11/2018 15:36, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-11-19 15:22:29)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> +static unsigned long get_avail_ram_mb(void)
> >
> > intel_get_avail_ram_mb() ?
>
> I thought so but when things went slow I looked inside and concluded it
> is not suitable.
Oh... That'll be the purge_vm_caches. We probably want to split it out.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner
2018-11-19 16:18 ` Chris Wilson
@ 2018-11-19 19:18 ` Tvrtko Ursulin
2018-11-19 19:34 ` Chris Wilson
0 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2018-11-19 19:18 UTC (permalink / raw)
To: Chris Wilson, igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin
On 19/11/2018 16:18, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-11-19 15:33:56)
>>
>> On 19/11/2018 15:28, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-11-19 15:22:28)
>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>
>>>> Add some nop instructions between recursive batch buffer start calls to
>>>> give system some breathing room. Without these, especially when coupled
>>>> with memory pressure, false GPU hangs can be observed caused by the
>>>> inability of the chip to cope.
>>>
>>> Doesn't seem to be required. And the machines most susceptible to timer
>>> errors due to busyspin have not show the issue.
>>
>> With the memory pressure subtest, the second patch in this series, it
>> was make it or break it to have the nops. Without them it was GPU hangs
>> all around, and with them so far all clean.
>
> First machine bsw, just applying patch 2/2,
>
> IGT-Version: 1.23-gb6b8d829 (x86_64) (Linux: 4.20.0-rc2+ x86_64)
> Using Execlists submission
> Ring size: 131 batches
> Starting subtest: wide-all
> wide: 420 cycles: 24121.034us
> Subtest wide-all: SUCCESS (47.060s)
> Starting subtest: wide-contexts
> wide: 340 cycles: 24893.896us
> Subtest wide-contexts: SUCCESS (22.265s)
> Starting subtest: wide-contexts-mempressure
> wide: 232 cycles: 25153.899us
> Subtest wide-contexts-mempressure: SUCCESS (23.141s)
>
> :|
Yes, I think what I had before I cleaned up the test case was more
copy&paste of the memory pressure thread from gem_syslatency - including
the rtprio and multithreadedness. So I was possibly starving the
tasklets and who knows what not, as well as applying memory pressure.
However, fact still is adding nops to the spinner made even that monster
pass repeatedly. I'll play with it more tomorrow.
Regards,
Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner
2018-11-19 19:18 ` Tvrtko Ursulin
@ 2018-11-19 19:34 ` Chris Wilson
0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2018-11-19 19:34 UTC (permalink / raw)
To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin
Quoting Tvrtko Ursulin (2018-11-19 19:18:52)
>
> On 19/11/2018 16:18, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-11-19 15:33:56)
> >>
> >> On 19/11/2018 15:28, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2018-11-19 15:22:28)
> >>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>>
> >>>> Add some nop instructions between recursive batch buffer start calls to
> >>>> give system some breathing room. Without these, especially when coupled
> >>>> with memory pressure, false GPU hangs can be observed caused by the
> >>>> inability of the chip to cope.
> >>>
> >>> Doesn't seem to be required. And the machines most susceptible to timer
> >>> errors due to busyspin have not show the issue.
> >>
> >> With the memory pressure subtest, the second patch in this series, it
> >> was make it or break it to have the nops. Without them it was GPU hangs
> >> all around, and with them so far all clean.
> >
> > First machine bsw, just applying patch 2/2,
> >
> > IGT-Version: 1.23-gb6b8d829 (x86_64) (Linux: 4.20.0-rc2+ x86_64)
> > Using Execlists submission
> > Ring size: 131 batches
> > Starting subtest: wide-all
> > wide: 420 cycles: 24121.034us
> > Subtest wide-all: SUCCESS (47.060s)
> > Starting subtest: wide-contexts
> > wide: 340 cycles: 24893.896us
> > Subtest wide-contexts: SUCCESS (22.265s)
> > Starting subtest: wide-contexts-mempressure
> > wide: 232 cycles: 25153.899us
> > Subtest wide-contexts-mempressure: SUCCESS (23.141s)
> >
> > :|
>
> Yes, I think what I had before I cleaned up the test case was more
> copy&paste of the memory pressure thread from gem_syslatency - including
> the rtprio and multithreadedness. So I was possibly starving the
> tasklets and who knows what not, as well as applying memory pressure.
> However, fact still is adding nops to the spinner made even that monster
> pass repeatedly. I'll play with it more tomorrow.
I am a bit nervous about using the noops to avoid the issue, as I
presume that there is a more realistic workload that could generate
similar system latencies, i.e. that there exists a pathological case
that users will hit for similar stalls (gem shrinker perchance).
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
* [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/2] tests/gem_exec_await: Relax the busy spinner
2018-11-19 15:22 [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner Tvrtko Ursulin
` (2 preceding siblings ...)
2018-11-19 16:44 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] " Patchwork
@ 2018-11-19 21:52 ` Patchwork
3 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2018-11-19 21:52 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: igt-dev
== Series Details ==
Series: series starting with [i-g-t,1/2] tests/gem_exec_await: Relax the busy spinner
URL : https://patchwork.freedesktop.org/series/52695/
State : success
== Summary ==
= CI Bug Log - changes from IGT_4720_full -> IGTPW_2077_full =
== Summary - WARNING ==
Minor unknown changes coming with IGTPW_2077_full need to be verified
manually.
If you think the reported changes have nothing to do with the changes
introduced in IGTPW_2077_full, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.
External URL: https://patchwork.freedesktop.org/api/1.0/series/52695/revisions/1/mbox/
== Possible new issues ==
Here are the unknown changes that may have been introduced in IGTPW_2077_full:
=== IGT changes ===
==== Warnings ====
igt@pm_rc6_residency@rc6-accuracy:
shard-kbl: SKIP -> PASS
== Known issues ==
Here are the changes found in IGTPW_2077_full that come from known issues:
=== IGT changes ===
==== Issues hit ====
igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-a:
shard-snb: NOTRUN -> DMESG-WARN (fdo#107956) +1
igt@kms_ccs@pipe-a-crc-sprite-planes-basic:
shard-glk: PASS -> FAIL (fdo#108145)
shard-kbl: PASS -> FAIL (fdo#108145, fdo#107725)
igt@kms_cursor_crc@cursor-256x256-sliding:
shard-glk: PASS -> FAIL (fdo#103232) +3
shard-kbl: PASS -> FAIL (fdo#103232)
igt@kms_cursor_crc@cursor-256x256-suspend:
shard-apl: PASS -> FAIL (fdo#103232, fdo#103191)
igt@kms_cursor_crc@cursor-256x85-onscreen:
shard-apl: PASS -> FAIL (fdo#103232) +2
igt@kms_flip@flip-vs-expired-vblank-interruptible:
shard-glk: PASS -> FAIL (fdo#105363, fdo#102887)
igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-mmap-gtt:
shard-apl: PASS -> FAIL (fdo#103167) +1
igt@kms_frontbuffer_tracking@fbc-2p-primscrn-cur-indfb-draw-mmap-cpu:
shard-glk: PASS -> FAIL (fdo#103167) +4
igt@kms_plane_alpha_blend@pipe-b-alpha-7efc:
shard-kbl: NOTRUN -> FAIL (fdo#108590, fdo#108145)
igt@kms_plane_alpha_blend@pipe-c-alpha-transparant-fb:
shard-kbl: NOTRUN -> FAIL (fdo#108145)
igt@kms_plane_multiple@atomic-pipe-b-tiling-yf:
shard-kbl: PASS -> FAIL (fdo#103166) +1
igt@kms_plane_multiple@atomic-pipe-c-tiling-y:
shard-glk: PASS -> FAIL (fdo#103166) +2
shard-apl: PASS -> FAIL (fdo#103166)
igt@kms_sysfs_edid_timing:
shard-kbl: NOTRUN -> FAIL (fdo#100047)
==== Possible fixes ====
igt@gem_eio@in-flight-1us:
shard-glk: FAIL (fdo#107799) -> PASS +1
igt@gem_eio@unwedge-stress:
shard-glk: FAIL -> PASS
igt@gem_ppgtt@blt-vs-render-ctx0:
shard-kbl: INCOMPLETE (fdo#103665, fdo#106887, fdo#106023) -> PASS
igt@kms_available_modes_crc@available_mode_test_crc:
shard-apl: FAIL (fdo#106641) -> PASS
igt@kms_chv_cursor_fail@pipe-c-128x128-bottom-edge:
shard-glk: DMESG-WARN (fdo#105763, fdo#106538) -> PASS +1
igt@kms_cursor_crc@cursor-64x21-onscreen:
shard-glk: FAIL (fdo#103232) -> PASS +2
igt@kms_cursor_crc@cursor-64x21-random:
shard-apl: FAIL (fdo#103232) -> PASS +1
igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-blt:
shard-apl: FAIL (fdo#103167) -> PASS +1
shard-kbl: FAIL (fdo#103167) -> PASS +1
igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-mmap-cpu:
shard-glk: FAIL (fdo#103167) -> PASS +3
igt@kms_plane@plane-position-covered-pipe-a-planes:
shard-glk: FAIL (fdo#103166) -> PASS +2
igt@kms_plane_alpha_blend@pipe-a-constant-alpha-max:
shard-glk: FAIL (fdo#108145) -> PASS
shard-kbl: FAIL (fdo#108145) -> PASS
shard-apl: FAIL (fdo#108145) -> PASS
igt@kms_plane_multiple@atomic-pipe-b-tiling-y:
shard-apl: FAIL (fdo#103166) -> PASS
igt@kms_setmode@basic:
shard-apl: FAIL (fdo#99912) -> PASS
igt@perf@blocking:
shard-hsw: FAIL (fdo#102252) -> PASS
fdo#100047 https://bugs.freedesktop.org/show_bug.cgi?id=100047
fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
fdo#103166 https://bugs.freedesktop.org/show_bug.cgi?id=103166
fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
fdo#103232 https://bugs.freedesktop.org/show_bug.cgi?id=103232
fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
fdo#105363 https://bugs.freedesktop.org/show_bug.cgi?id=105363
fdo#105763 https://bugs.freedesktop.org/show_bug.cgi?id=105763
fdo#106023 https://bugs.freedesktop.org/show_bug.cgi?id=106023
fdo#106538 https://bugs.freedesktop.org/show_bug.cgi?id=106538
fdo#106641 https://bugs.freedesktop.org/show_bug.cgi?id=106641
fdo#106887 https://bugs.freedesktop.org/show_bug.cgi?id=106887
fdo#107725 https://bugs.freedesktop.org/show_bug.cgi?id=107725
fdo#107799 https://bugs.freedesktop.org/show_bug.cgi?id=107799
fdo#107956 https://bugs.freedesktop.org/show_bug.cgi?id=107956
fdo#108145 https://bugs.freedesktop.org/show_bug.cgi?id=108145
fdo#108590 https://bugs.freedesktop.org/show_bug.cgi?id=108590
fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912
== Participating hosts (7 -> 5) ==
Missing (2): shard-skl shard-iclb
== Build changes ==
* IGT: IGT_4720 -> IGTPW_2077
CI_DRM_5159: af98442486c4eeed23ed036dfa2b556def4203bd @ git://anongit.freedesktop.org/gfx-ci/linux
IGTPW_2077: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2077/
IGT_4720: c27aaca295d3ca2a38521e571c012449371e4bb5 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2077/shards.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2018-11-19 21:52 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-11-19 15:22 [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner Tvrtko Ursulin
2018-11-19 15:22 ` [igt-dev] [PATCH i-g-t 2/2] tests/gem_exec_await: Add a memory pressure subtest Tvrtko Ursulin
2018-11-19 15:36 ` Chris Wilson
2018-11-19 15:54 ` Tvrtko Ursulin
2018-11-19 17:07 ` [Intel-gfx] " Chris Wilson
2018-11-19 15:28 ` [igt-dev] [PATCH i-g-t 1/2] tests/gem_exec_await: Relax the busy spinner Chris Wilson
2018-11-19 15:33 ` Tvrtko Ursulin
2018-11-19 16:18 ` Chris Wilson
2018-11-19 19:18 ` Tvrtko Ursulin
2018-11-19 19:34 ` Chris Wilson
2018-11-19 16:44 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] " Patchwork
2018-11-19 21:52 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox