From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Siluvery, Arun" Subject: Re: [RFC] drm/i915/bdw: Initialize BDW workarounds in render ring init fn Date: Mon, 28 Jul 2014 21:46:33 +0100 Message-ID: <53D6B6A9.3030102@linux.intel.com> References: <1406565106-27522-1-git-send-email-arun.siluvery@linux.intel.com> <1406565106-27522-2-git-send-email-arun.siluvery@linux.intel.com> <20140728170039.GN27580@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1"; Format="flowed" Content-Transfer-Encoding: quoted-printable Return-path: Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by gabe.freedesktop.org (Postfix) with ESMTP id 433F989C56 for ; Mon, 28 Jul 2014 13:46:56 -0700 (PDT) In-Reply-To: <20140728170039.GN27580@intel.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" To: =?ISO-8859-1?Q?Ville_Syrj=E4l=E4?= Cc: intel-gfx@lists.freedesktop.org List-Id: intel-gfx@lists.freedesktop.org On 28/07/2014 18:00, Ville Syrj=E4l=E4 wrote: > On Mon, Jul 28, 2014 at 05:31:46PM +0100, arun.siluvery@linux.intel.com w= rote: >> From: Arun Siluvery >> >> The workarounds at the moment are initialized in init_clock_gating() but >> they are lost during reset; In case of execlists some workarounds modify >> registers that are part of register state context, since these are not >> initialized until init_clock_gating() default context ends up with >> incorrect values as render context is restored and saved before updated >> by workarounds hence move them to render ring init fn. This should be >> ok as these workarounds are not related to display clock gating. >> >> Signed-off-by: Arun Siluvery >> --- >> drivers/gpu/drm/i915/i915_debugfs.c | 46 ++++++++++++++++++++++ >> drivers/gpu/drm/i915/intel_pm.c | 59 -------------------------= --- >> drivers/gpu/drm/i915/intel_ringbuffer.c | 68 +++++++++++++++++++++++++= ++++++++ >> 3 files changed, 114 insertions(+), 59 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/= i915_debugfs.c >> index 083683c..cf7da30 100644 >> --- a/drivers/gpu/drm/i915/i915_debugfs.c >> +++ b/drivers/gpu/drm/i915/i915_debugfs.c >> @@ -2397,20 +2397,65 @@ static int i915_shared_dplls_info(struct seq_fil= e *m, void *unused) >> seq_printf(m, " dpll_md: 0x%08x\n", pll->hw_state.dpll_md); >> seq_printf(m, " fp0: 0x%08x\n", pll->hw_state.fp0); >> seq_printf(m, " fp1: 0x%08x\n", pll->hw_state.fp1); >> seq_printf(m, " wrpll: 0x%08x\n", pll->hw_state.wrpll); >> } >> drm_modeset_unlock_all(dev); >> >> return 0; >> } >> >> +static int i915_workaround_info(struct seq_file *m, void *unused) >> +{ >> + struct drm_info_node *node =3D (struct drm_info_node *) m->private; >> + struct drm_device *dev =3D node->minor->dev; >> + struct drm_i915_private *dev_priv =3D dev->dev_private; >> + int ret; >> + >> + ret =3D mutex_lock_interruptible(&dev->struct_mutex); >> + if (ret) >> + return ret; >> + >> + if (IS_BROADWELL(dev)) { >> + seq_printf(m, "GEN8_ROW_CHICKEN:\t0x%08x\n", >> + I915_READ(GEN8_ROW_CHICKEN)); >> + seq_printf(m, "HALF_SLICE_CHICKEN3:\t0x%08x\n", >> + I915_READ(HALF_SLICE_CHICKEN3)); >> + seq_printf(m, "GAMTARBMODE:\t0x%08x\n", I915_READ(GAMTARBMODE)); >> + seq_printf(m, "_3D_CHICKEN3:\t0x%08x\n", >> + I915_READ(_3D_CHICKEN3)); >> + seq_printf(m, "COMMON_SLICE_CHICKEN2:\t0x%08x\n", >> + I915_READ(COMMON_SLICE_CHICKEN2)); >> + seq_printf(m, "GEN7_HALF_SLICE_CHICKEN1:\t0x%08x\n", >> + I915_READ(GEN7_HALF_SLICE_CHICKEN1)); >> + seq_printf(m, "GEN7_ROW_CHICKEN2:\t0x%08x\n", >> + I915_READ(GEN7_ROW_CHICKEN2)); >> + seq_printf(m, "GAM_ECOCHK:\t0x%08x\n", >> + I915_READ(GAM_ECOCHK)); >> + seq_printf(m, "HDC_CHICKEN0:\t0x%08x\n", >> + I915_READ(HDC_CHICKEN0)); >> + seq_printf(m, "GEN7_FF_THREAD_MODE:\t0x%08x\n", >> + I915_READ(GEN7_FF_THREAD_MODE)); >> + seq_printf(m, "GEN8_UCGCTL6:\t0x%08x\n", >> + I915_READ(GEN8_UCGCTL6)); >> + seq_printf(m, "GEN6_RC_SLEEP_PSMI_CONTROL:\t0x%08x\n", >> + I915_READ(GEN6_RC_SLEEP_PSMI_CONTROL)); >> + seq_printf(m, "CACHE_MODE_1:\t0x%08x\n", >> + I915_READ(CACHE_MODE_1)); >> + } else >> + DRM_DEBUG_DRIVER("Not available for Gen%d\n", >> + INTEL_INFO(dev)->gen); >> + >> + mutex_unlock(&dev->struct_mutex); >> + return 0; >> +} >> + > > This smells like a separate patch. But I'm not sure we want at all since > intel_reg_read will provide the same information. > >> struct pipe_crc_info { >> const char *name; >> struct drm_device *dev; >> enum pipe pipe; >> }; >> >> static int i915_pipe_crc_open(struct inode *inode, struct file *filep) >> { >> struct pipe_crc_info *info =3D inode->i_private; >> struct drm_i915_private *dev_priv =3D info->dev->dev_private; >> @@ -3904,20 +3949,21 @@ static const struct drm_info_list i915_debugfs_l= ist[] =3D { >> {"i915_ppgtt_info", i915_ppgtt_info, 0}, >> {"i915_llc", i915_llc, 0}, >> {"i915_edp_psr_status", i915_edp_psr_status, 0}, >> {"i915_sink_crc_eDP1", i915_sink_crc, 0}, >> {"i915_energy_uJ", i915_energy_uJ, 0}, >> {"i915_pc8_status", i915_pc8_status, 0}, >> {"i915_power_domain_info", i915_power_domain_info, 0}, >> {"i915_display_info", i915_display_info, 0}, >> {"i915_semaphore_status", i915_semaphore_status, 0}, >> {"i915_shared_dplls_info", i915_shared_dplls_info, 0}, >> + {"i915_workaround_info", i915_workaround_info, 0}, >> }; >> #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list) >> >> static const struct i915_debugfs_files { >> const char *name; >> const struct file_operations *fops; >> } i915_debugfs_files[] =3D { >> {"i915_wedged", &i915_wedged_fops}, >> {"i915_max_freq", &i915_max_freq_fops}, >> {"i915_min_freq", &i915_min_freq_fops}, >> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/inte= l_pm.c >> index 3f88f29..9597f95 100644 >> --- a/drivers/gpu/drm/i915/intel_pm.c >> +++ b/drivers/gpu/drm/i915/intel_pm.c >> @@ -5393,101 +5393,42 @@ static void gen8_init_clock_gating(struct drm_d= evice *dev) >> struct drm_i915_private *dev_priv =3D dev->dev_private; >> enum pipe pipe; >> >> I915_WRITE(WM3_LP_ILK, 0); >> I915_WRITE(WM2_LP_ILK, 0); >> I915_WRITE(WM1_LP_ILK, 0); >> >> /* FIXME(BDW): Check all the w/a, some might only apply to >> * pre-production hw. */ >> >> - /* WaDisablePartialInstShootdown:bdw */ >> - I915_WRITE(GEN8_ROW_CHICKEN, >> - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); >> - >> - /* WaDisableThreadStallDopClockGating:bdw */ >> - /* FIXME: Unclear whether we really need this on production bdw. */ >> - I915_WRITE(GEN8_ROW_CHICKEN, >> - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); >> - >> - /* >> - * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for >> - * pre-production hardware >> - */ >> - I915_WRITE(HALF_SLICE_CHICKEN3, >> - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS)); >> - I915_WRITE(HALF_SLICE_CHICKEN3, >> - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); >> - I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE)); >> - >> - I915_WRITE(_3D_CHICKEN3, >> - _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2))); >> - >> - I915_WRITE(COMMON_SLICE_CHICKEN2, >> - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); >> - >> - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, >> - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); >> - >> - /* WaDisableDopClockGating:bdw May not be needed for production */ >> - I915_WRITE(GEN7_ROW_CHICKEN2, >> - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); >> - >> - /* WaSwitchSolVfFArbitrationPriority:bdw */ >> - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL= ); >> >> /* WaPsrDPAMaskVBlankInSRD:bdw */ >> I915_WRITE(CHICKEN_PAR1_1, >> I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); >> >> /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ >> for_each_pipe(pipe) { >> I915_WRITE(CHICKEN_PIPESL_1(pipe), >> I915_READ(CHICKEN_PIPESL_1(pipe)) | >> BDW_DPRS_MASK_VBLANK_SRD); >> } >> >> - /* Use Force Non-Coherent whenever executing a 3D context. This is a >> - * workaround for for a possible hang in the unlikely event a TLB >> - * invalidation occurs during a PSD flush. >> - */ >> - I915_WRITE(HDC_CHICKEN0, >> - I915_READ(HDC_CHICKEN0) | >> - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); >> - >> - /* WaVSRefCountFullforceMissDisable:bdw */ >> - /* WaDSRefCountFullforceMissDisable:bdw */ >> - I915_WRITE(GEN7_FF_THREAD_MODE, >> - I915_READ(GEN7_FF_THREAD_MODE) & >> - ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); >> - >> /* >> * BSpec recommends 8x4 when MSAA is used, >> * however in practice 16x4 seems fastest. >> * >> * Note that PS/WM thread counts depend on the WIZ hashing >> * disable bit, which we don't touch here, but it's good >> * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). >> */ >> I915_WRITE(GEN7_GT_MODE, >> GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); > > GT_MODE doesn't get clobbered by a reset? I would have expected it to > behave the same way as CACHE_MODE_{0,1). > >> - >> - I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, >> - _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); >> - >> - /* WaDisableSDEUnitClockGating:bdw */ >> - I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | >> - GEN8_SDEUNIT_CLOCK_GATE_DISABLE); > > And I would have expected the UCGCTL registers to not be clobbered. IIRC > they didn't get clobbered on my IVB, but my memory might be wrong here. I could complete testing because of the problem with CACHE_MODE_1, will = double check these registers and move/unmove them accordingly. > >> - >> - /* Wa4x4STCOptimizationDisable:bdw */ >> - I915_WRITE(CACHE_MODE_1, >> - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); >> } >> >> static void haswell_init_clock_gating(struct drm_device *dev) >> { >> struct drm_i915_private *dev_priv =3D dev->dev_private; >> >> ilk_init_lp_watermarks(dev); >> >> /* L3 caching of data atomics doesn't work -- disable it. */ >> I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i= 915/intel_ringbuffer.c >> index b3d8f76..6818e34 100644 >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c >> @@ -591,20 +591,85 @@ init_pipe_control(struct intel_engine_cs *ring) >> return 0; >> >> err_unpin: >> i915_gem_object_ggtt_unpin(ring->scratch.obj); >> err_unref: >> drm_gem_object_unreference(&ring->scratch.obj->base); >> err: >> return ret; >> } >> >> +static void bdw_init_workarounds(struct drm_device *dev) >> +{ >> + struct drm_i915_private *dev_priv =3D dev->dev_private; >> + >> + /* WaDisablePartialInstShootdown:bdw */ >> + I915_WRITE(GEN8_ROW_CHICKEN, >> + _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); >> + >> + /* WaDisableThreadStallDopClockGating:bdw */ >> + /* FIXME: Unclear whether we really need these on production bdw. */ >> + I915_WRITE(GEN8_ROW_CHICKEN, >> + _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); >> + >> + /* >> + * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for >> + * pre-production hardware >> + */ >> + I915_WRITE(HALF_SLICE_CHICKEN3, >> + _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS)); >> + I915_WRITE(HALF_SLICE_CHICKEN3, >> + _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); >> + I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE)); >> + >> + I915_WRITE(_3D_CHICKEN3, >> + _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2))); >> + >> + I915_WRITE(COMMON_SLICE_CHICKEN2, >> + _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); >> + >> + I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, >> + _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); >> + >> + /* WaDisableDopClockGating:bdw May not be needed for production */ >> + I915_WRITE(GEN7_ROW_CHICKEN2, >> + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); >> + >> + /* WaSwitchSolVfFArbitrationPriority:bdw */ >> + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL= ); >> + >> + /* Use Force Non-Coherent whenever executing a 3D context. This is a >> + * workaround for for a possible hang in the unlikely event a TLB >> + * invalidation occurs during a PSD flush. >> + */ >> + I915_WRITE(HDC_CHICKEN0, >> + I915_READ(HDC_CHICKEN0) | > > You might kill this read since it's a masked register. Maybe as a > separate prep-patch. > >> + _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); >> + >> + /* WaVSRefCountFullforceMissDisable:bdw */ >> + /* WaDSRefCountFullforceMissDisable:bdw */ >> + I915_WRITE(GEN7_FF_THREAD_MODE, >> + I915_READ(GEN7_FF_THREAD_MODE) & >> + ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); >> + >> + /* WaDisableSDEUnitClockGating:bdw */ >> + I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | >> + GEN8_SDEUNIT_CLOCK_GATE_DISABLE); >> + >> + I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, >> + _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); >> + >> + /* Wa4x4STCOptimizationDisable:bdw */ >> + I915_WRITE(CACHE_MODE_1, >> + _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); > > I think it would be nice to reorder this function a bit to keep similar r= egisters > close to each other. The current mismash makes it easy to overlook a > workaround and then we may end up with duplicates (wouldn't be the first > time that happened). > sure, I will take care of this in the next version. >> +} >> + >> static int init_render_ring(struct intel_engine_cs *ring) >> { >> struct drm_device *dev =3D ring->dev; >> struct drm_i915_private *dev_priv =3D dev->dev_private; >> int ret =3D init_ring_common(ring); >> if (ret) >> return ret; >> >> /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ >> if (INTEL_INFO(dev)->gen >=3D 4 && INTEL_INFO(dev)->gen < 7) >> @@ -646,20 +711,23 @@ static int init_render_ring(struct intel_engine_cs= *ring) >> I915_WRITE(CACHE_MODE_0, >> _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); >> } >> >> if (INTEL_INFO(dev)->gen >=3D 6) >> I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); >> >> if (HAS_L3_DPF(dev)) >> I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); >> >> + if (IS_BROADWELL(dev)) >> + bdw_init_workarounds(dev); >> + >> return ret; >> } >> >> static void render_ring_cleanup(struct intel_engine_cs *ring) >> { >> struct drm_device *dev =3D ring->dev; >> struct drm_i915_private *dev_priv =3D dev->dev_private; >> >> if (dev_priv->semaphore_obj) { >> i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj); >> -- >> 1.9.2 >> >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx >