From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Siluvery, Arun" Subject: Re: [RFC] drm/i915/bdw: Apply workarounds to the golden render state Date: Fri, 08 Aug 2014 14:11:02 +0100 Message-ID: <53E4CC66.2060901@linux.intel.com> References: <1407491577-31626-1-git-send-email-arun.siluvery@linux.intel.com> <1407491577-31626-2-git-send-email-arun.siluvery@linux.intel.com> <20140808122054.GF4193@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1"; Format="flowed" Content-Transfer-Encoding: quoted-printable Return-path: Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by gabe.freedesktop.org (Postfix) with ESMTP id D8D936E86B for ; Fri, 8 Aug 2014 06:11:26 -0700 (PDT) In-Reply-To: <20140808122054.GF4193@intel.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" To: =?ISO-8859-1?Q?Ville_Syrj=E4l=E4?= Cc: intel-gfx@lists.freedesktop.org List-Id: intel-gfx@lists.freedesktop.org On 08/08/2014 13:20, Ville Syrj=E4l=E4 wrote: > On Fri, Aug 08, 2014 at 10:52:57AM +0100, arun.siluvery@linux.intel.com w= rote: >> From: Arun Siluvery >> >> Workarounds for bdw are currently applied in init_clock_gating() but they >> are lost following a gpu reset. Some of the registers are part of regist= er >> state context and they are restored with every context switch so initial= izing >> WAs in golden render state ensures that they are applied even when we st= art >> with an uninitialized context or during hw initialization followed by a = reset. > > This approach might require separate null states for BDW vs. CHV and IVB > vs. HSW vs. VLV, which seems a bit unfortunate. Might be better to just > issue the w/a register writes via LRIs from the code as part of the null > state load. > Yes this is a better approach, I am currently changing the code to = achieve this, not sure how easy it would be. > Although I don't actually undertand how this improves things as opposed > to just appllying the w/as via mmio writes. Does it? > I observed random behaviour CACHE_MODE_1 which simply used to lose the = applied workaround on first context switch even though it is loaded with = inhibit=3D=3D1; register values are not supposed to change but it was chang= ing. I think it is better to add them in null batch to ensure hardware starts = with WAs applied. regards Arun >> >> Signed-off-by: Arun Siluvery >> --- >> drivers/gpu/drm/i915/intel_pm.c | 50 -------------------= -- >> drivers/gpu/drm/i915/intel_renderstate_gen8.c | 62 +++++++++++++++++--= -------- >> 2 files changed, 39 insertions(+), 73 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/inte= l_pm.c >> index 1ddd4df..ab64b64 100644 >> --- a/drivers/gpu/drm/i915/intel_pm.c >> +++ b/drivers/gpu/drm/i915/intel_pm.c >> @@ -5402,38 +5402,11 @@ static void gen8_init_clock_gating(struct drm_de= vice *dev) >> /* FIXME(BDW): Check all the w/a, some might only apply to >> * pre-production hw. */ >> >> - /* WaDisablePartialInstShootdown:bdw */ >> - I915_WRITE(GEN8_ROW_CHICKEN, >> - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); >> - >> - /* WaDisableThreadStallDopClockGating:bdw */ >> - /* FIXME: Unclear whether we really need this on production bdw. */ >> - I915_WRITE(GEN8_ROW_CHICKEN, >> - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); >> - >> - /* >> - * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for >> - * pre-production hardware >> - */ >> - I915_WRITE(HALF_SLICE_CHICKEN3, >> - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS)); >> - I915_WRITE(HALF_SLICE_CHICKEN3, >> - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); >> I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE)); >> >> I915_WRITE(_3D_CHICKEN3, >> _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2))); >> >> - I915_WRITE(COMMON_SLICE_CHICKEN2, >> - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); >> - >> - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, >> - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); >> - >> - /* WaDisableDopClockGating:bdw May not be needed for production */ >> - I915_WRITE(GEN7_ROW_CHICKEN2, >> - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); >> - >> /* WaSwitchSolVfFArbitrationPriority:bdw */ >> I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SO= L); >> >> @@ -5448,41 +5421,18 @@ static void gen8_init_clock_gating(struct drm_de= vice *dev) >> BDW_DPRS_MASK_VBLANK_SRD); >> } >> >> - /* Use Force Non-Coherent whenever executing a 3D context. This is a >> - * workaround for for a possible hang in the unlikely event a TLB >> - * invalidation occurs during a PSD flush. >> - */ >> - I915_WRITE(HDC_CHICKEN0, >> - I915_READ(HDC_CHICKEN0) | >> - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); >> - >> /* WaVSRefCountFullforceMissDisable:bdw */ >> /* WaDSRefCountFullforceMissDisable:bdw */ >> I915_WRITE(GEN7_FF_THREAD_MODE, >> I915_READ(GEN7_FF_THREAD_MODE) & >> ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); >> >> - /* >> - * BSpec recommends 8x4 when MSAA is used, >> - * however in practice 16x4 seems fastest. >> - * >> - * Note that PS/WM thread counts depend on the WIZ hashing >> - * disable bit, which we don't touch here, but it's good >> - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). >> - */ >> - I915_WRITE(GEN7_GT_MODE, >> - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); >> - >> I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, >> _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); >> >> /* WaDisableSDEUnitClockGating:bdw */ >> I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | >> GEN8_SDEUNIT_CLOCK_GATE_DISABLE); >> - >> - /* Wa4x4STCOptimizationDisable:bdw */ >> - I915_WRITE(CACHE_MODE_1, >> - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); >> } >> >> static void haswell_init_clock_gating(struct drm_device *dev) >> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.c b/drivers/gpu= /drm/i915/intel_renderstate_gen8.c >> index 75ef1b5..0b26783 100644 >> --- a/drivers/gpu/drm/i915/intel_renderstate_gen8.c >> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.c >> @@ -1,14 +1,38 @@ >> #include "intel_renderstate.h" >> >> static const u32 gen8_null_state_relocs[] =3D { >> - 0x00000048, >> - 0x00000050, >> - 0x00000060, >> - 0x000003ec, >> + 0x000000a8, >> + 0x000000b0, >> + 0x000000c0, >> + 0x0000044c, >> -1, >> }; >> >> static const u32 gen8_null_state_batch[] =3D { >> + 0x11000001, >> + 0x0000e4f0, >> + 0x83208320, >> + 0x11000001, >> + 0x0000e4f4, >> + 0x00010001, >> + 0x11000001, >> + 0x0000e184, >> + 0x01020102, >> + 0x11000001, >> + 0x0000e100, >> + 0x04000400, >> + 0x11000001, >> + 0x00007014, >> + 0x00010001, >> + 0x11000001, >> + 0x00007300, >> + 0x00100010, >> + 0x11000001, >> + 0x00007004, >> + 0x00400040, >> + 0x11000001, >> + 0x00007008, >> + 0x02800200, >> 0x69040000, >> 0x61020001, >> 0x00000000, >> @@ -40,9 +64,9 @@ static const u32 gen8_null_state_batch[] =3D { >> 0xfffff001, >> 0x00001001, >> 0x78230000, >> - 0x000006e0, >> + 0x00000720, >> 0x78210000, >> - 0x00000700, >> + 0x00000740, >> 0x78300000, >> 0x08010040, >> 0x78330000, >> @@ -52,9 +76,9 @@ static const u32 gen8_null_state_batch[] =3D { >> 0x78320000, >> 0x08000000, >> 0x78240000, >> - 0x00000641, >> + 0x00000681, >> 0x780e0000, >> - 0x00000601, >> + 0x00000641, >> 0x780d0000, >> 0x00000000, >> 0x78180000, >> @@ -199,9 +223,9 @@ static const u32 gen8_null_state_batch[] =3D { >> 0x00000000, >> 0x00000000, >> 0x782a0000, >> - 0x00000480, >> + 0x000004c0, >> 0x782f0000, >> - 0x00000540, >> + 0x00000580, >> 0x78140000, >> 0x00000800, >> 0x78170009, >> @@ -216,7 +240,7 @@ static const u32 gen8_null_state_batch[] =3D { >> 0x00000000, >> 0x00000000, >> 0x7820000a, >> - 0x00000580, >> + 0x000005c0, >> 0x00000000, >> 0x08080000, >> 0x00000000, >> @@ -232,7 +256,7 @@ static const u32 gen8_null_state_batch[] =3D { >> 0x784f0000, >> 0x80000100, >> 0x780f0000, >> - 0x00000740, >> + 0x00000780, >> 0x78050006, >> 0x00000000, >> 0x00000000, >> @@ -260,7 +284,7 @@ static const u32 gen8_null_state_batch[] =3D { >> 0x00000000, >> 0x78080003, >> 0x00006000, >> - 0x000005e0, /* reloc */ >> + 0x00000620, /* reloc */ >> 0x00000000, >> 0x00000000, >> 0x78090005, >> @@ -289,16 +313,8 @@ static const u32 gen8_null_state_batch[] =3D { >> 0x00000000, >> 0x00000000, >> 0x00000000, >> - 0x00000000, >> - 0x00000000, >> - 0x00000000, >> - 0x00000000, >> - 0x00000000, >> - 0x00000000, >> - 0x00000000, >> - 0x00000000, >> - 0x000004c0, /* state start */ >> - 0x00000500, >> + 0x00000500, /* state start */ >> + 0x00000540, >> 0x00000000, >> 0x00000000, >> 0x00000000, >> -- >> 2.0.4 >> >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx >