* [PATCH] drm/i915/bdw: Initialize workarounds in render ring init fn.
@ 2014-08-14 16:51 arun.siluvery
2014-08-15 23:26 ` Michael H. Nguyen
0 siblings, 1 reply; 3+ messages in thread
From: arun.siluvery @ 2014-08-14 16:51 UTC (permalink / raw)
To: intel-gfx
From: Arun Siluvery <arun.siluvery@linux.intel.com>
The workarounds at the moment are initialized in init_clock_gating() but
they are lost during reset, suspend/resume; this patch moves workarounds
that are part of register state context to render ring init fn otherwise
default context ends up with incorrect values as they don't get initialized
until init_clock_gating fn. This should be ok as they are not related to
display clock gating in the first place.
v2: Add macro to generate w/a table (Daniel)
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 38 +++++++++++++++++++
drivers/gpu/drm/i915/i915_drv.h | 32 ++++++++++++++++
drivers/gpu/drm/i915/intel_pm.c | 50 -------------------------
drivers/gpu/drm/i915/intel_ringbuffer.c | 66 +++++++++++++++++++++++++++++++++
4 files changed, 136 insertions(+), 50 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9e737b7..fc28835 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2406,6 +2406,43 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused)
return 0;
}
+static int intel_wa_registers(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *) m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ int i;
+ int ret;
+
+ if (!dev_priv->num_wa) {
+ DRM_DEBUG_DRIVER("Workaround table not available\n");
+ return -EINVAL;
+ }
+
+ if (dev_priv->num_wa > I915_MAX_WA_REGS)
+ dev_priv->num_wa = I915_MAX_WA_REGS;
+
+ ret = mutex_lock_interruptible(&dev->struct_mutex);
+ if (ret)
+ return ret;
+
+ intel_runtime_pm_get(dev_priv);
+
+ seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa);
+ for (i = 0; i < dev_priv->num_wa; ++i) {
+ if (dev_priv->wa_regs[i].addr)
+ seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
+ dev_priv->wa_regs[i].addr,
+ I915_READ(dev_priv->wa_regs[i].addr),
+ dev_priv->wa_regs[i].mask);
+ }
+
+ intel_runtime_pm_put(dev_priv);
+ mutex_unlock(&dev->struct_mutex);
+
+ return 0;
+}
+
struct pipe_crc_info {
const char *name;
struct drm_device *dev;
@@ -3936,6 +3973,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{"i915_semaphore_status", i915_semaphore_status, 0},
{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
{"i915_dp_mst_info", i915_dp_mst_info, 0},
+ {"intel_wa_registers", intel_wa_registers, 0},
};
#define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 18c9ad8..c4190a9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1222,6 +1222,24 @@ struct i915_gpu_error {
unsigned int test_irq_rings;
};
+/*
+ * workarounds are currently applied at different places and changes
+ * are being done to consolidate them at a single place hence the
+ * exact count is not clear at this point; replace this with accurate
+ * number based on gen later.
+ */
+#define I915_MAX_WA_REGS 16
+
+/*
+ * structure to verify workarounds status
+ * mask: represent w/a bits
+ */
+struct intel_wa {
+ u32 addr;
+ u32 val;
+ u32 mask;
+};
+
enum modeset_restore {
MODESET_ON_LID_OPEN,
MODESET_DONE,
@@ -1422,6 +1440,9 @@ struct drm_i915_private {
struct drm_i915_gem_object *semaphore_obj;
uint32_t last_seqno, next_seqno;
+ uint32_t num_wa;
+ struct intel_wa wa_regs[I915_MAX_WA_REGS];
+
drm_dma_handle_t *status_page_dmah;
struct resource mch_res;
@@ -2803,6 +2824,17 @@ int vlv_freq_opcode(struct drm_i915_private *dev_priv, int val);
#define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg)
#define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg)
+#define I915_WRITE_WA(reg, val) ({ \
+ if (dev_priv->num_wa >= 0 \
+ && dev_priv->num_wa < I915_MAX_WA_REGS) { \
+ dev_priv->wa_regs[dev_priv->num_wa].addr = reg; \
+ dev_priv->wa_regs[dev_priv->num_wa].mask \
+ = (val) & 0xFFFF; \
+ dev_priv->num_wa++; \
+ I915_WRITE(reg, val); \
+ } \
+ })
+
/* "Broadcast RGB" property */
#define INTEL_BROADCAST_RGB_AUTO 0
#define INTEL_BROADCAST_RGB_FULL 1
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1ddd4df..ab64b64 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5402,38 +5402,11 @@ static void gen8_init_clock_gating(struct drm_device *dev)
/* FIXME(BDW): Check all the w/a, some might only apply to
* pre-production hw. */
- /* WaDisablePartialInstShootdown:bdw */
- I915_WRITE(GEN8_ROW_CHICKEN,
- _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
-
- /* WaDisableThreadStallDopClockGating:bdw */
- /* FIXME: Unclear whether we really need this on production bdw. */
- I915_WRITE(GEN8_ROW_CHICKEN,
- _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
-
- /*
- * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
- * pre-production hardware
- */
- I915_WRITE(HALF_SLICE_CHICKEN3,
- _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
- I915_WRITE(HALF_SLICE_CHICKEN3,
- _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
I915_WRITE(_3D_CHICKEN3,
_MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
- I915_WRITE(COMMON_SLICE_CHICKEN2,
- _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
-
- I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
- _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
-
- /* WaDisableDopClockGating:bdw May not be needed for production */
- I915_WRITE(GEN7_ROW_CHICKEN2,
- _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
-
/* WaSwitchSolVfFArbitrationPriority:bdw */
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
@@ -5448,41 +5421,18 @@ static void gen8_init_clock_gating(struct drm_device *dev)
BDW_DPRS_MASK_VBLANK_SRD);
}
- /* Use Force Non-Coherent whenever executing a 3D context. This is a
- * workaround for for a possible hang in the unlikely event a TLB
- * invalidation occurs during a PSD flush.
- */
- I915_WRITE(HDC_CHICKEN0,
- I915_READ(HDC_CHICKEN0) |
- _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
-
/* WaVSRefCountFullforceMissDisable:bdw */
/* WaDSRefCountFullforceMissDisable:bdw */
I915_WRITE(GEN7_FF_THREAD_MODE,
I915_READ(GEN7_FF_THREAD_MODE) &
~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
- /*
- * BSpec recommends 8x4 when MSAA is used,
- * however in practice 16x4 seems fastest.
- *
- * Note that PS/WM thread counts depend on the WIZ hashing
- * disable bit, which we don't touch here, but it's good
- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
- */
- I915_WRITE(GEN7_GT_MODE,
- GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
-
I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
_MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
/* WaDisableSDEUnitClockGating:bdw */
I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
-
- /* Wa4x4STCOptimizationDisable:bdw */
- I915_WRITE(CACHE_MODE_1,
- _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
}
static void haswell_init_clock_gating(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b3d8f76..abec00c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -598,6 +598,69 @@ err:
return ret;
}
+static void bdw_init_workarounds(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ /*
+ * workarounds applied in this fn are part of register state context,
+ * they need to be re-initialized followed by gpu reset, suspend/resume,
+ * module reload.
+ */
+ dev_priv->num_wa = 0;
+ memset(dev_priv->wa_regs, 0, sizeof(dev_priv->wa_regs));
+
+ /* WaDisablePartialInstShootdown:bdw */
+ /* WaDisableThreadStallDopClockGating:bdw */
+ /* FIXME: Unclear whether we really need this on production bdw. */
+
+ I915_WRITE_WA(GEN8_ROW_CHICKEN,
+ _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
+ | STALL_DOP_GATING_DISABLE));
+
+ /* WaDisableDopClockGating:bdw May not be needed for production */
+ I915_WRITE_WA(GEN7_ROW_CHICKEN2,
+ _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+
+ /*
+ * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
+ * pre-production hardware
+ */
+ I915_WRITE_WA(HALF_SLICE_CHICKEN3,
+ _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS
+ | GEN8_SAMPLER_POWER_BYPASS_DIS));
+
+ I915_WRITE_WA(GEN7_HALF_SLICE_CHICKEN1,
+ _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
+
+ /* Wa4x4STCOptimizationDisable:bdw */
+ I915_WRITE_WA(CACHE_MODE_1,
+ _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ I915_WRITE_WA(GEN7_GT_MODE,
+ GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
+
+ I915_WRITE_WA(COMMON_SLICE_CHICKEN2,
+ _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
+
+ /* Use Force Non-Coherent whenever executing a 3D context. This is a
+ * workaround for for a possible hang in the unlikely event a TLB
+ * invalidation occurs during a PSD flush.
+ */
+ I915_WRITE_WA(HDC_CHICKEN0,
+ _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
+
+ DRM_DEBUG_DRIVER("No. of workarounds applied: %d\n", dev_priv->num_wa);
+}
+
static int init_render_ring(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
@@ -653,6 +716,9 @@ static int init_render_ring(struct intel_engine_cs *ring)
if (HAS_L3_DPF(dev))
I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
+ if (IS_BROADWELL(dev))
+ bdw_init_workarounds(dev);
+
return ret;
}
--
2.0.4
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] drm/i915/bdw: Initialize workarounds in render ring init fn.
2014-08-14 16:51 [PATCH] drm/i915/bdw: Initialize workarounds in render ring init fn arun.siluvery
@ 2014-08-15 23:26 ` Michael H. Nguyen
2014-08-25 21:39 ` Daniel Vetter
0 siblings, 1 reply; 3+ messages in thread
From: Michael H. Nguyen @ 2014-08-15 23:26 UTC (permalink / raw)
To: arun.siluvery, intel-gfx
On 08/14/2014 09:51 AM, arun.siluvery@linux.intel.com wrote:
> From: Arun Siluvery <arun.siluvery@linux.intel.com>
>
> The workarounds at the moment are initialized in init_clock_gating() but
> they are lost during reset, suspend/resume; this patch moves workarounds
> that are part of register state context to render ring init fn otherwise
> default context ends up with incorrect values as they don't get initialized
> until init_clock_gating fn. This should be ok as they are not related to
> display clock gating in the first place.
>
> v2: Add macro to generate w/a table (Daniel)
>
> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 38 +++++++++++++++++++
> drivers/gpu/drm/i915/i915_drv.h | 32 ++++++++++++++++
> drivers/gpu/drm/i915/intel_pm.c | 50 -------------------------
> drivers/gpu/drm/i915/intel_ringbuffer.c | 66 +++++++++++++++++++++++++++++++++
> 4 files changed, 136 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 9e737b7..fc28835 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2406,6 +2406,43 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused)
> return 0;
> }
>
> +static int intel_wa_registers(struct seq_file *m, void *unused)
> +{
> + struct drm_info_node *node = (struct drm_info_node *) m->private;
> + struct drm_device *dev = node->minor->dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + int i;
> + int ret;
> +
> + if (!dev_priv->num_wa) {
> + DRM_DEBUG_DRIVER("Workaround table not available\n");
> + return -EINVAL;
> + }
> +
> + if (dev_priv->num_wa > I915_MAX_WA_REGS)
> + dev_priv->num_wa = I915_MAX_WA_REGS;
> +
> + ret = mutex_lock_interruptible(&dev->struct_mutex);
> + if (ret)
> + return ret;
> +
> + intel_runtime_pm_get(dev_priv);
> +
> + seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa);
> + for (i = 0; i < dev_priv->num_wa; ++i) {
> + if (dev_priv->wa_regs[i].addr)
> + seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
> + dev_priv->wa_regs[i].addr,
> + I915_READ(dev_priv->wa_regs[i].addr),
> + dev_priv->wa_regs[i].mask);
> + }
> +
> + intel_runtime_pm_put(dev_priv);
> + mutex_unlock(&dev->struct_mutex);
> +
> + return 0;
> +}
> +
> struct pipe_crc_info {
> const char *name;
> struct drm_device *dev;
> @@ -3936,6 +3973,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
> {"i915_semaphore_status", i915_semaphore_status, 0},
> {"i915_shared_dplls_info", i915_shared_dplls_info, 0},
> {"i915_dp_mst_info", i915_dp_mst_info, 0},
> + {"intel_wa_registers", intel_wa_registers, 0},
> };
> #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 18c9ad8..c4190a9 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1222,6 +1222,24 @@ struct i915_gpu_error {
> unsigned int test_irq_rings;
> };
>
> +/*
> + * workarounds are currently applied at different places and changes
> + * are being done to consolidate them at a single place hence the
> + * exact count is not clear at this point; replace this with accurate
> + * number based on gen later.
> + */
> +#define I915_MAX_WA_REGS 16
> +
> +/*
> + * structure to verify workarounds status
> + * mask: represent w/a bits
> + */
> +struct intel_wa {
> + u32 addr;
> + u32 val;
> + u32 mask;
> +};
Can we rename this to intel_reg_wa or similar? intel_wa confused me for
a second. Personally when I think about wa's, they can be simple
register writes like is the case here or more complex requiring some
software logic.
Also, should this be defined in a userspace accessible header? I see
this defined again locally in the IGT test.
> +
> enum modeset_restore {
> MODESET_ON_LID_OPEN,
> MODESET_DONE,
> @@ -1422,6 +1440,9 @@ struct drm_i915_private {
> struct drm_i915_gem_object *semaphore_obj;
> uint32_t last_seqno, next_seqno;
>
> + uint32_t num_wa;
> + struct intel_wa wa_regs[I915_MAX_WA_REGS];
> +
> drm_dma_handle_t *status_page_dmah;
> struct resource mch_res;
>
> @@ -2803,6 +2824,17 @@ int vlv_freq_opcode(struct drm_i915_private *dev_priv, int val);
> #define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg)
> #define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg)
>
> +#define I915_WRITE_WA(reg, val) ({ \
> + if (dev_priv->num_wa >= 0 \
> + && dev_priv->num_wa < I915_MAX_WA_REGS) { \
> + dev_priv->wa_regs[dev_priv->num_wa].addr = reg; \
> + dev_priv->wa_regs[dev_priv->num_wa].mask \
> + = (val) & 0xFFFF; \
> + dev_priv->num_wa++; \
> + I915_WRITE(reg, val); \
> + } \
If this macro is called too many times, there will be silent failures I
believe. Should we add an else case that does a WARN_ON or prints a
message? This will help developers catch bugs.
Thanks,
Mike
> + })
> +
> /* "Broadcast RGB" property */
> #define INTEL_BROADCAST_RGB_AUTO 0
> #define INTEL_BROADCAST_RGB_FULL 1
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 1ddd4df..ab64b64 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5402,38 +5402,11 @@ static void gen8_init_clock_gating(struct drm_device *dev)
> /* FIXME(BDW): Check all the w/a, some might only apply to
> * pre-production hw. */
>
> - /* WaDisablePartialInstShootdown:bdw */
> - I915_WRITE(GEN8_ROW_CHICKEN,
> - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
> -
> - /* WaDisableThreadStallDopClockGating:bdw */
> - /* FIXME: Unclear whether we really need this on production bdw. */
> - I915_WRITE(GEN8_ROW_CHICKEN,
> - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
> -
> - /*
> - * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
> - * pre-production hardware
> - */
> - I915_WRITE(HALF_SLICE_CHICKEN3,
> - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
> - I915_WRITE(HALF_SLICE_CHICKEN3,
> - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
> I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
>
> I915_WRITE(_3D_CHICKEN3,
> _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
>
> - I915_WRITE(COMMON_SLICE_CHICKEN2,
> - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
> -
> - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
> - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
> -
> - /* WaDisableDopClockGating:bdw May not be needed for production */
> - I915_WRITE(GEN7_ROW_CHICKEN2,
> - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
> -
> /* WaSwitchSolVfFArbitrationPriority:bdw */
> I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
>
> @@ -5448,41 +5421,18 @@ static void gen8_init_clock_gating(struct drm_device *dev)
> BDW_DPRS_MASK_VBLANK_SRD);
> }
>
> - /* Use Force Non-Coherent whenever executing a 3D context. This is a
> - * workaround for for a possible hang in the unlikely event a TLB
> - * invalidation occurs during a PSD flush.
> - */
> - I915_WRITE(HDC_CHICKEN0,
> - I915_READ(HDC_CHICKEN0) |
> - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
> -
> /* WaVSRefCountFullforceMissDisable:bdw */
> /* WaDSRefCountFullforceMissDisable:bdw */
> I915_WRITE(GEN7_FF_THREAD_MODE,
> I915_READ(GEN7_FF_THREAD_MODE) &
> ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
>
> - /*
> - * BSpec recommends 8x4 when MSAA is used,
> - * however in practice 16x4 seems fastest.
> - *
> - * Note that PS/WM thread counts depend on the WIZ hashing
> - * disable bit, which we don't touch here, but it's good
> - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
> - */
> - I915_WRITE(GEN7_GT_MODE,
> - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
> -
> I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
> _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
>
> /* WaDisableSDEUnitClockGating:bdw */
> I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
> GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
> -
> - /* Wa4x4STCOptimizationDisable:bdw */
> - I915_WRITE(CACHE_MODE_1,
> - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
> }
>
> static void haswell_init_clock_gating(struct drm_device *dev)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index b3d8f76..abec00c 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -598,6 +598,69 @@ err:
> return ret;
> }
>
> +static void bdw_init_workarounds(struct drm_device *dev)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> +
> + /*
> + * workarounds applied in this fn are part of register state context,
> + * they need to be re-initialized followed by gpu reset, suspend/resume,
> + * module reload.
> + */
> + dev_priv->num_wa = 0;
> + memset(dev_priv->wa_regs, 0, sizeof(dev_priv->wa_regs));
> +
> + /* WaDisablePartialInstShootdown:bdw */
> + /* WaDisableThreadStallDopClockGating:bdw */
> + /* FIXME: Unclear whether we really need this on production bdw. */
> +
> + I915_WRITE_WA(GEN8_ROW_CHICKEN,
> + _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
> + | STALL_DOP_GATING_DISABLE));
> +
> + /* WaDisableDopClockGating:bdw May not be needed for production */
> + I915_WRITE_WA(GEN7_ROW_CHICKEN2,
> + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
> +
> + /*
> + * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
> + * pre-production hardware
> + */
> + I915_WRITE_WA(HALF_SLICE_CHICKEN3,
> + _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS
> + | GEN8_SAMPLER_POWER_BYPASS_DIS));
> +
> + I915_WRITE_WA(GEN7_HALF_SLICE_CHICKEN1,
> + _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
> +
> + /* Wa4x4STCOptimizationDisable:bdw */
> + I915_WRITE_WA(CACHE_MODE_1,
> + _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
> +
> + /*
> + * BSpec recommends 8x4 when MSAA is used,
> + * however in practice 16x4 seems fastest.
> + *
> + * Note that PS/WM thread counts depend on the WIZ hashing
> + * disable bit, which we don't touch here, but it's good
> + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
> + */
> + I915_WRITE_WA(GEN7_GT_MODE,
> + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
> +
> + I915_WRITE_WA(COMMON_SLICE_CHICKEN2,
> + _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
> +
> + /* Use Force Non-Coherent whenever executing a 3D context. This is a
> + * workaround for for a possible hang in the unlikely event a TLB
> + * invalidation occurs during a PSD flush.
> + */
> + I915_WRITE_WA(HDC_CHICKEN0,
> + _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
> +
> + DRM_DEBUG_DRIVER("No. of workarounds applied: %d\n", dev_priv->num_wa);
> +}
> +
> static int init_render_ring(struct intel_engine_cs *ring)
> {
> struct drm_device *dev = ring->dev;
> @@ -653,6 +716,9 @@ static int init_render_ring(struct intel_engine_cs *ring)
> if (HAS_L3_DPF(dev))
> I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
>
> + if (IS_BROADWELL(dev))
> + bdw_init_workarounds(dev);
> +
> return ret;
> }
>
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] drm/i915/bdw: Initialize workarounds in render ring init fn.
2014-08-15 23:26 ` Michael H. Nguyen
@ 2014-08-25 21:39 ` Daniel Vetter
0 siblings, 0 replies; 3+ messages in thread
From: Daniel Vetter @ 2014-08-25 21:39 UTC (permalink / raw)
To: Michael H. Nguyen; +Cc: intel-gfx
On Fri, Aug 15, 2014 at 04:26:26PM -0700, Michael H. Nguyen wrote:
> Also, should this be defined in a userspace accessible header? I see this
> defined again locally in the IGT test.
It's a debugfs interface, i.e. no real ABI guarantees. And so better not
to expose this in any of the uapi header files. We've done pretty much
the same duplication for all other debugfs files used in igt tests.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2014-08-25 21:38 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-08-14 16:51 [PATCH] drm/i915/bdw: Initialize workarounds in render ring init fn arun.siluvery
2014-08-15 23:26 ` Michael H. Nguyen
2014-08-25 21:39 ` Daniel Vetter
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox