From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Siluvery, Arun" Subject: Re: [RFC 2/2] igt/gem_workarounds: igt to test workaround registers Date: Fri, 08 Aug 2014 17:39:45 +0100 Message-ID: <53E4FD51.3010204@linux.intel.com> References: <1407491696-31727-1-git-send-email-arun.siluvery@linux.intel.com> <1407491696-31727-3-git-send-email-arun.siluvery@linux.intel.com> <20140808141215.GU8727@phenom.ffwll.local> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1"; Format="flowed" Content-Transfer-Encoding: quoted-printable Return-path: Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by gabe.freedesktop.org (Postfix) with ESMTP id 9C3806E888 for ; Fri, 8 Aug 2014 09:39:48 -0700 (PDT) In-Reply-To: <20140808141215.GU8727@phenom.ffwll.local> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" To: Daniel Vetter Cc: intel-gfx@lists.freedesktop.org List-Id: intel-gfx@lists.freedesktop.org On 08/08/2014 15:12, Daniel Vetter wrote: > On Fri, Aug 08, 2014 at 10:54:56AM +0100, arun.siluvery@linux.intel.com w= rote: >> From: Arun Siluvery >> >> Some of the workarounds are lost followed by a gpu reset, suspend/resume; >> this patch adds a test which captures register state before and after >> the test scenario. >> >> This test currently verifies only bdw workarounds. >> >> Signed-off-by: Arun Siluvery > > Some comments below. > >> --- >> lib/intel_reg.h | 8 ++ >> tests/Makefile.sources | 1 + >> tests/gem_workarounds.c | 211 ++++++++++++++++++++++++++++++++++++++++= ++++++++ >> 3 files changed, 220 insertions(+) >> create mode 100644 tests/gem_workarounds.c >> >> diff --git a/lib/intel_reg.h b/lib/intel_reg.h >> index 86175bb..d015c36 100644 >> --- a/lib/intel_reg.h >> +++ b/lib/intel_reg.h >> @@ -3628,4 +3628,12 @@ typedef enum { >> #define GEN6_WIZ_HASHING_16x4 GEN6_WIZ_HASHING(1, 0) >> #define GEN6_WIZ_HASHING_MASK (GEN6_WIZ_HASHING(1, 1) << 16) >> >> +#define GAMTARBMODE 0x04a08 >> +#define _3D_CHICKEN3 0x02090 >> +#define GAM_ECOCHK 0x4090 >> +#define CHICKEN_PAR1_1 0x42080 >> +#define GEN7_FF_THREAD_MODE 0x20a0 >> +#define GEN6_RC_SLEEP_PSMI_CONTROL 0x2050 >> +#define GEN8_UCGCTL6 0x9430 >> + >> #endif /* _I810_REG_H */ >> diff --git a/tests/Makefile.sources b/tests/Makefile.sources >> index 0eb9369..a17acd1 100644 >> --- a/tests/Makefile.sources >> +++ b/tests/Makefile.sources >> @@ -134,6 +134,7 @@ TESTS_progs =3D \ >> gem_unfence_active_buffers \ >> gem_unref_active_buffers \ >> gem_wait_render_timeout \ >> + gem_workarounds \ >> gen3_mixed_blits \ >> gen3_render_linear_blits \ >> gen3_render_mixed_blits \ >> diff --git a/tests/gem_workarounds.c b/tests/gem_workarounds.c >> new file mode 100644 >> index 0000000..35d1aa7 >> --- /dev/null >> +++ b/tests/gem_workarounds.c >> @@ -0,0 +1,211 @@ >> +/* >> + * Copyright =A9 2014 Intel Corporation >> + * >> + * Permission is hereby granted, free of charge, to any person obtainin= g a >> + * copy of this software and associated documentation files (the "Softw= are"), >> + * to deal in the Software without restriction, including without limit= ation >> + * the rights to use, copy, modify, merge, publish, distribute, sublice= nse, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the= next >> + * paragraph) shall be included in all copies or substantial portions o= f the >> + * Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPR= ESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL= ITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT S= HALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR= OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARIS= ING >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER = DEALINGS >> + * IN THE SOFTWARE. >> + * >> + * Authors: >> + * Arun Siluvery >> + * >> + */ >> + >> +#define _GNU_SOURCE >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> + >> +#include "ioctl_wrappers.h" >> +#include "drmtest.h" >> +#include "igt_debugfs.h" >> +#include "igt_aux.h" >> +#include "intel_chipset.h" >> +#include "intel_io.h" >> + >> +int drm_fd; >> +static drm_intel_bufmgr *bufmgr; >> +struct intel_batchbuffer *batch; >> +uint32_t devid; >> + >> +enum operation { >> + GPU_RESET, >> + SUSPEND_RESUME, > > The suspend test doesn't seem to be wire up ... > > Also I think it would be worth to have a module-reload version here too. > Suspend/Resume is not working; device is not resuming even after the = timer is elapsed. Do we know suspend/resume works correctly on nightly? >> +}; >> + >> +struct workaround { >> + const char *reg_name; >> + uint32_t address; >> +}; >> + >> +static struct workaround bdw_workarounds[] =3D >> +{ >> + { "GEN8_ROW_CHICKEN", GEN8_ROW_CHICKEN }, >> + { "GEN7_ROW_CHICKEN2", GEN7_ROW_CHICKEN2 }, >> + { "HALF_SLICE_CHICKEN3", HALF_SLICE_CHICKEN3 }, >> + { "GEN7_HALF_SLICE_CHICKEN1", GEN7_HALF_SLICE_CHICKEN1 }, >> + { "COMMON_SLICE_CHICKEN2", COMMON_SLICE_CHICKEN2 }, >> + { "HDC_CHICKEN0", HDC_CHICKEN0 }, >> + { "GEN7_CACHE_MODE_1", GEN7_CACHE_MODE_1 }, >> + { "GEN7_GT_MODE", GEN7_GT_MODE }, >> + { "GAMTARBMODE", GAMTARBMODE }, >> + { "_3D_CHICKEN3", _3D_CHICKEN3 }, >> + { "GAM_ECOCHK", GAM_ECOCHK }, >> + { "CHICKEN_PAR1_1", CHICKEN_PAR1_1 }, >> + { "GEN7_FF_THREAD_MODE", GEN7_FF_THREAD_MODE }, >> + { "GEN6_RC_SLEEP_PSMI_CONTROL", GEN6_RC_SLEEP_PSMI_CONTROL }, >> + { "GEN8_UCGCTL6", GEN8_UCGCTL6 }, >> + { "NULL", 0xFFFF }, >> +}; > > Crazy idea I've just had to validate that all the w/a table here is > up-to-date with the one in the kernel: > > - We create a special WA_REG macro in the kernel which we use to wrap all > registers used in workarounds at the specific use-site (i.e. not in the > header). So > > I951_WRITE(WA_REG(GEN8_ROW_CHICKEN), ....); > > - That macro then adds the register to a table which we can dump through > debugs with a file called intel_wa_registers. This happens at runtime. > This is important since a static list over all platforms might included > registers which hang some platforms when we read them. > > - A special subtest in this test here compares the kernel-provided list > with the one supplied here and makes sure that all the w/a in the kern= el > list are in the test list, too. Or we just ditch the test list here > completely, but that might not work for special cases where we only ne= ed > to check some masks ... > > Opinions on this? Would this help with maintaining this testcase and > ensuring that it is always up-to-date with the kernel w/a list? I really > want to make sure we get this right, there's been way too many cases where > w/a settings have been lost over resume, runtime pm, ctx switches ... > I will change the implementation to use this macro. so in this case the table is updated before each use case (reset, = suspend/resume, module reload etc)? Is it not sufficient to capture the = state at the beginning? my understanding is the wa state should really = stay the same and we compare the current state (eg after reset) to the = one at the beginning rather than the state before reset. I think it is easier to maintain if we completely remove the workaround = list from igt itself, based on hardware macro can populate only those = workarounds that are applicable but you mentioned that may not work for = special cases, could you elaborate about these cases? regards Arun >> + >> +static void test_hang_gpu(void) >> +{ >> + int retry_count =3D 30; >> + enum stop_ring_flags flags; >> + struct drm_i915_gem_execbuffer2 execbuf; >> + struct drm_i915_gem_exec_object2 gem_exec; >> + uint32_t b[2] =3D {MI_BATCH_BUFFER_END}; >> + >> + igt_assert(retry_count); >> + igt_set_stop_rings(STOP_RING_DEFAULTS); >> + >> + memset(&gem_exec, 0, sizeof(gem_exec)); >> + gem_exec.handle =3D gem_create(drm_fd, 4096); >> + gem_write(drm_fd, gem_exec.handle, 0, b, sizeof(b)); >> + >> + memset(&execbuf, 0, sizeof(execbuf)); >> + execbuf.buffers_ptr =3D (uintptr_t)&gem_exec; >> + execbuf.buffer_count =3D 1; >> + execbuf.batch_len =3D sizeof(b); >> + >> + drmIoctl(drm_fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); >> + >> + while(retry_count--) { >> + flags =3D igt_get_stop_rings(); >> + if (flags =3D=3D 0) >> + break; >> + printf("gpu hang not yet cleared, retries left %d\n", retry_count); >> + sleep(1); >> + } >> + >> + flags =3D igt_get_stop_rings(); >> + if (flags) >> + igt_set_stop_rings(STOP_RING_NONE); >> +} >> + >> +static void test_suspend_resume(void) >> +{ >> + printf("Suspending the device ...\n"); >> + igt_system_suspend_autoresume(); >> +} >> + >> +static void capture_wa_state(struct workaround *wa_regs, int num_wa, >> + unsigned int *reg_values) >> +{ >> + int i; >> + >> + igt_assert(reg_values); >> + intel_register_access_init(intel_get_pci_device(), 0); >> + >> + for (i =3D 0; i < num_wa; ++i) >> + reg_values[i] =3D intel_register_read(wa_regs[i].address); >> + >> + intel_register_access_fini(); >> +} >> + >> +static void check_workarounds(struct workaround *wa, enum operation op) >> +{ >> + int i; >> + int num_wa =3D 0; >> + unsigned int *before; >> + unsigned int *after; >> + bool fail =3D false; >> + >> + while(wa[num_wa].address !=3D 0xFFFF) >> + num_wa++; >> + >> + igt_assert(num_wa); >> + >> + before =3D malloc(num_wa * sizeof(*before)); >> + memset(before, 0x00, num_wa * sizeof(*before)); >> + capture_wa_state(wa, num_wa, before); >> + >> + switch (op) { >> + case GPU_RESET: >> + test_hang_gpu(); >> + break; >> + >> + case SUSPEND_RESUME: >> + test_suspend_resume(); >> + break; >> + >> + default: >> + fail =3D true; >> + goto out; >> + } >> + >> + after =3D malloc(num_wa * sizeof(*after)); >> + memset(after, 0x00, num_wa * sizeof(*after)); >> + capture_wa_state(wa, num_wa, after); >> + >> + for (i =3D 0; i < num_wa; ++i) { >> + if (before[i] !=3D after[i]) { >> + fail =3D true; >> + printf("%s workaround failed, before: 0x%08X, after: 0x%08X\n", >> + wa[i].reg_name, before[i], after[i]); >> + } >> + } >> + >> + free(after); >> + >> +out: >> + free(before); >> + >> + igt_assert(fail =3D=3D false); >> +} >> + >> +int main(int argc, char **argv) >> +{ >> + igt_subtest_init(argc, argv); >> + >> + igt_fixture { >> + drm_fd =3D drm_open_any(); >> + >> + bufmgr =3D drm_intel_bufmgr_gem_init(drm_fd, 4096); >> + devid =3D intel_get_drm_devid(drm_fd); >> + batch =3D intel_batchbuffer_alloc(bufmgr, devid); >> + } >> + >> + igt_subtest("check-workaround-data-after-reset") { >> + if (IS_BROADWELL(devid)) > > The logic here should be switched around, or at least if you don't have a > w/a table for a given platform we should skip the test. And tbh for > anything gen8+ we should fail it so that someone knows there's still work > to to. >> + check_workarounds(&bdw_workarounds[0], GPU_RESET); > > A simple > > else > igt_skip_on("No w/a table found!\"); > > here should do the trick. > >> + } >> + >> + >> + close(drm_fd); >> + igt_exit(); >> +} >> -- >> 2.0.4 >> > >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx > >