All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Ville Syrjälä" <ville.syrjala@linux.intel.com>
To: arun.siluvery@linux.intel.com
Cc: intel-gfx@lists.freedesktop.org
Subject: Re: [RFC] drm/i915/bdw: Initialize BDW workarounds in render ring init fn
Date: Mon, 28 Jul 2014 20:00:39 +0300	[thread overview]
Message-ID: <20140728170039.GN27580@intel.com> (raw)
In-Reply-To: <1406565106-27522-2-git-send-email-arun.siluvery@linux.intel.com>

On Mon, Jul 28, 2014 at 05:31:46PM +0100, arun.siluvery@linux.intel.com wrote:
> From: Arun Siluvery <arun.siluvery@linux.intel.com>
> 
> The workarounds at the moment are initialized in init_clock_gating() but
> they are lost during reset; In case of execlists some workarounds modify
> registers that are part of register state context, since these are not
> initialized until init_clock_gating() default context ends up with
> incorrect values as render context is restored and saved before updated
> by workarounds hence move them to render ring init fn. This should be
> ok as these workarounds are not related to display clock gating.
> 
> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c     | 46 ++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_pm.c         | 59 ----------------------------
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 68 +++++++++++++++++++++++++++++++++
>  3 files changed, 114 insertions(+), 59 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 083683c..cf7da30 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2397,20 +2397,65 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused)
>  		seq_printf(m, " dpll_md: 0x%08x\n", pll->hw_state.dpll_md);
>  		seq_printf(m, " fp0:     0x%08x\n", pll->hw_state.fp0);
>  		seq_printf(m, " fp1:     0x%08x\n", pll->hw_state.fp1);
>  		seq_printf(m, " wrpll:   0x%08x\n", pll->hw_state.wrpll);
>  	}
>  	drm_modeset_unlock_all(dev);
>  
>  	return 0;
>  }
>  
> +static int i915_workaround_info(struct seq_file *m, void *unused)
> +{
> +	struct drm_info_node *node = (struct drm_info_node *) m->private;
> +	struct drm_device *dev = node->minor->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	int ret;
> +
> +	ret = mutex_lock_interruptible(&dev->struct_mutex);
> +	if (ret)
> +		return ret;
> +
> +	if (IS_BROADWELL(dev)) {
> +		seq_printf(m, "GEN8_ROW_CHICKEN:\t0x%08x\n",
> +			   I915_READ(GEN8_ROW_CHICKEN));
> +		seq_printf(m, "HALF_SLICE_CHICKEN3:\t0x%08x\n",
> +			   I915_READ(HALF_SLICE_CHICKEN3));
> +		seq_printf(m, "GAMTARBMODE:\t0x%08x\n", I915_READ(GAMTARBMODE));
> +		seq_printf(m, "_3D_CHICKEN3:\t0x%08x\n",
> +			   I915_READ(_3D_CHICKEN3));
> +		seq_printf(m, "COMMON_SLICE_CHICKEN2:\t0x%08x\n",
> +			   I915_READ(COMMON_SLICE_CHICKEN2));
> +		seq_printf(m, "GEN7_HALF_SLICE_CHICKEN1:\t0x%08x\n",
> +			   I915_READ(GEN7_HALF_SLICE_CHICKEN1));
> +		seq_printf(m, "GEN7_ROW_CHICKEN2:\t0x%08x\n",
> +			   I915_READ(GEN7_ROW_CHICKEN2));
> +		seq_printf(m, "GAM_ECOCHK:\t0x%08x\n",
> +			   I915_READ(GAM_ECOCHK));
> +		seq_printf(m, "HDC_CHICKEN0:\t0x%08x\n",
> +			   I915_READ(HDC_CHICKEN0));
> +		seq_printf(m, "GEN7_FF_THREAD_MODE:\t0x%08x\n",
> +			   I915_READ(GEN7_FF_THREAD_MODE));
> +		seq_printf(m, "GEN8_UCGCTL6:\t0x%08x\n",
> +			   I915_READ(GEN8_UCGCTL6));
> +		seq_printf(m, "GEN6_RC_SLEEP_PSMI_CONTROL:\t0x%08x\n",
> +			   I915_READ(GEN6_RC_SLEEP_PSMI_CONTROL));
> +		seq_printf(m, "CACHE_MODE_1:\t0x%08x\n",
> +			   I915_READ(CACHE_MODE_1));
> +	} else
> +		DRM_DEBUG_DRIVER("Not available for Gen%d\n",
> +				 INTEL_INFO(dev)->gen);
> +
> +	mutex_unlock(&dev->struct_mutex);
> +	return 0;
> +}
> +

This smells like a separate patch. But I'm not sure we want at all since
intel_reg_read will provide the same information.

>  struct pipe_crc_info {
>  	const char *name;
>  	struct drm_device *dev;
>  	enum pipe pipe;
>  };
>  
>  static int i915_pipe_crc_open(struct inode *inode, struct file *filep)
>  {
>  	struct pipe_crc_info *info = inode->i_private;
>  	struct drm_i915_private *dev_priv = info->dev->dev_private;
> @@ -3904,20 +3949,21 @@ static const struct drm_info_list i915_debugfs_list[] = {
>  	{"i915_ppgtt_info", i915_ppgtt_info, 0},
>  	{"i915_llc", i915_llc, 0},
>  	{"i915_edp_psr_status", i915_edp_psr_status, 0},
>  	{"i915_sink_crc_eDP1", i915_sink_crc, 0},
>  	{"i915_energy_uJ", i915_energy_uJ, 0},
>  	{"i915_pc8_status", i915_pc8_status, 0},
>  	{"i915_power_domain_info", i915_power_domain_info, 0},
>  	{"i915_display_info", i915_display_info, 0},
>  	{"i915_semaphore_status", i915_semaphore_status, 0},
>  	{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
> +	{"i915_workaround_info", i915_workaround_info, 0},
>  };
>  #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
>  
>  static const struct i915_debugfs_files {
>  	const char *name;
>  	const struct file_operations *fops;
>  } i915_debugfs_files[] = {
>  	{"i915_wedged", &i915_wedged_fops},
>  	{"i915_max_freq", &i915_max_freq_fops},
>  	{"i915_min_freq", &i915_min_freq_fops},
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 3f88f29..9597f95 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5393,101 +5393,42 @@ static void gen8_init_clock_gating(struct drm_device *dev)
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	enum pipe pipe;
>  
>  	I915_WRITE(WM3_LP_ILK, 0);
>  	I915_WRITE(WM2_LP_ILK, 0);
>  	I915_WRITE(WM1_LP_ILK, 0);
>  
>  	/* FIXME(BDW): Check all the w/a, some might only apply to
>  	 * pre-production hw. */
>  
> -	/* WaDisablePartialInstShootdown:bdw */
> -	I915_WRITE(GEN8_ROW_CHICKEN,
> -		   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
> -
> -	/* WaDisableThreadStallDopClockGating:bdw */
> -	/* FIXME: Unclear whether we really need this on production bdw. */
> -	I915_WRITE(GEN8_ROW_CHICKEN,
> -		   _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
> -
> -	/*
> -	 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
> -	 * pre-production hardware
> -	 */
> -	I915_WRITE(HALF_SLICE_CHICKEN3,
> -		   _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
> -	I915_WRITE(HALF_SLICE_CHICKEN3,
> -		   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
> -	I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
> -
> -	I915_WRITE(_3D_CHICKEN3,
> -		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
> -
> -	I915_WRITE(COMMON_SLICE_CHICKEN2,
> -		   _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
> -
> -	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
> -		   _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
> -
> -	/* WaDisableDopClockGating:bdw May not be needed for production */
> -	I915_WRITE(GEN7_ROW_CHICKEN2,
> -		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
> -
> -	/* WaSwitchSolVfFArbitrationPriority:bdw */
> -	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
>  
>  	/* WaPsrDPAMaskVBlankInSRD:bdw */
>  	I915_WRITE(CHICKEN_PAR1_1,
>  		   I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
>  
>  	/* WaPsrDPRSUnmaskVBlankInSRD:bdw */
>  	for_each_pipe(pipe) {
>  		I915_WRITE(CHICKEN_PIPESL_1(pipe),
>  			   I915_READ(CHICKEN_PIPESL_1(pipe)) |
>  			   BDW_DPRS_MASK_VBLANK_SRD);
>  	}
>  
> -	/* Use Force Non-Coherent whenever executing a 3D context. This is a
> -	 * workaround for for a possible hang in the unlikely event a TLB
> -	 * invalidation occurs during a PSD flush.
> -	 */
> -	I915_WRITE(HDC_CHICKEN0,
> -		   I915_READ(HDC_CHICKEN0) |
> -		   _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
> -
> -	/* WaVSRefCountFullforceMissDisable:bdw */
> -	/* WaDSRefCountFullforceMissDisable:bdw */
> -	I915_WRITE(GEN7_FF_THREAD_MODE,
> -		   I915_READ(GEN7_FF_THREAD_MODE) &
> -		   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
> -
>  	/*
>  	 * BSpec recommends 8x4 when MSAA is used,
>  	 * however in practice 16x4 seems fastest.
>  	 *
>  	 * Note that PS/WM thread counts depend on the WIZ hashing
>  	 * disable bit, which we don't touch here, but it's good
>  	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
>  	 */
>  	I915_WRITE(GEN7_GT_MODE,
>  		   GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);

GT_MODE doesn't get clobbered by a reset? I would have expected it to
behave the same way as CACHE_MODE_{0,1).

> -
> -	I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
> -		   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
> -
> -	/* WaDisableSDEUnitClockGating:bdw */
> -	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
> -		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);

And I would have expected the UCGCTL registers to not be clobbered. IIRC
they didn't get clobbered on my IVB, but my memory might be wrong here.

> -
> -	/* Wa4x4STCOptimizationDisable:bdw */
> -	I915_WRITE(CACHE_MODE_1,
> -		   _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
>  }
>  
>  static void haswell_init_clock_gating(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
>  	ilk_init_lp_watermarks(dev);
>  
>  	/* L3 caching of data atomics doesn't work -- disable it. */
>  	I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index b3d8f76..6818e34 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -591,20 +591,85 @@ init_pipe_control(struct intel_engine_cs *ring)
>  	return 0;
>  
>  err_unpin:
>  	i915_gem_object_ggtt_unpin(ring->scratch.obj);
>  err_unref:
>  	drm_gem_object_unreference(&ring->scratch.obj->base);
>  err:
>  	return ret;
>  }
>  
> +static void bdw_init_workarounds(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	/* WaDisablePartialInstShootdown:bdw */
> +	I915_WRITE(GEN8_ROW_CHICKEN,
> +		   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
> +
> +	/* WaDisableThreadStallDopClockGating:bdw */
> +	/* FIXME: Unclear whether we really need these on production bdw. */
> +	I915_WRITE(GEN8_ROW_CHICKEN,
> +		   _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
> +
> +	/*
> +	 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
> +	 * pre-production hardware
> +	 */
> +	I915_WRITE(HALF_SLICE_CHICKEN3,
> +		   _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
> +	I915_WRITE(HALF_SLICE_CHICKEN3,
> +		   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
> +	I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
> +
> +	I915_WRITE(_3D_CHICKEN3,
> +		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
> +
> +	I915_WRITE(COMMON_SLICE_CHICKEN2,
> +		   _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
> +
> +	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
> +		   _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
> +
> +	/* WaDisableDopClockGating:bdw May not be needed for production */
> +	I915_WRITE(GEN7_ROW_CHICKEN2,
> +		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
> +
> +	/* WaSwitchSolVfFArbitrationPriority:bdw */
> +	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
> +
> +	/* Use Force Non-Coherent whenever executing a 3D context. This is a
> +	 * workaround for for a possible hang in the unlikely event a TLB
> +	 * invalidation occurs during a PSD flush.
> +	 */
> +	I915_WRITE(HDC_CHICKEN0,
> +		   I915_READ(HDC_CHICKEN0) |

You might kill this read since it's a masked register. Maybe as a
separate prep-patch.

> +		   _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
> +
> +	/* WaVSRefCountFullforceMissDisable:bdw */
> +	/* WaDSRefCountFullforceMissDisable:bdw */
> +	I915_WRITE(GEN7_FF_THREAD_MODE,
> +		   I915_READ(GEN7_FF_THREAD_MODE) &
> +		   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
> +
> +	/* WaDisableSDEUnitClockGating:bdw */
> +	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
> +		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
> +
> +	I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
> +		   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
> +
> +	/* Wa4x4STCOptimizationDisable:bdw */
> +	I915_WRITE(CACHE_MODE_1,
> +		   _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));

I think it would be nice to reorder this function a bit to keep similar registers
close to each other. The current mismash makes it easy to overlook a
workaround and then we may end up with duplicates (wouldn't be the first
time that happened).

> +}
> +
>  static int init_render_ring(struct intel_engine_cs *ring)
>  {
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	int ret = init_ring_common(ring);
>  	if (ret)
>  		return ret;
>  
>  	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
>  	if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
> @@ -646,20 +711,23 @@ static int init_render_ring(struct intel_engine_cs *ring)
>  		I915_WRITE(CACHE_MODE_0,
>  			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
>  	}
>  
>  	if (INTEL_INFO(dev)->gen >= 6)
>  		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
>  
>  	if (HAS_L3_DPF(dev))
>  		I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
>  
> +	if (IS_BROADWELL(dev))
> +		bdw_init_workarounds(dev);
> +
>  	return ret;
>  }
>  
>  static void render_ring_cleanup(struct intel_engine_cs *ring)
>  {
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
>  	if (dev_priv->semaphore_obj) {
>  		i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
> -- 
> 1.9.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC

  reply	other threads:[~2014-07-28 17:01 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-28 16:31 [RFC] Move BDW workarounds to ring init fn arun.siluvery
2014-07-28 16:31 ` [RFC] drm/i915/bdw: Initialize BDW workarounds in render " arun.siluvery
2014-07-28 17:00   ` Ville Syrjälä [this message]
2014-07-28 19:22     ` Daniel Vetter
2014-07-28 20:41       ` Siluvery, Arun
2014-07-28 20:46     ` Siluvery, Arun
2014-07-29  9:56       ` Ville Syrjälä
2014-07-28 17:26 ` [RFC] Move BDW workarounds to " Ville Syrjälä
2014-07-29 22:27   ` Siluvery, Arun
2014-07-30  9:36     ` Ville Syrjälä

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140728170039.GN27580@intel.com \
    --to=ville.syrjala@linux.intel.com \
    --cc=arun.siluvery@linux.intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.