* [PATCH 1/6] drm/i915: Reinitialize default context after reset
2014-09-18 14:58 [PATCH 0/6] RFC: Persistent workarounds across reset/resume Mika Kuoppala
@ 2014-09-18 14:58 ` Mika Kuoppala
2014-09-18 15:36 ` Chris Wilson
2014-09-19 17:46 ` Volkin, Bradley D
2014-09-18 14:58 ` [PATCH 2/6] drm/i915: Reinitialize default context after resume Mika Kuoppala
` (4 subsequent siblings)
5 siblings, 2 replies; 17+ messages in thread
From: Mika Kuoppala @ 2014-09-18 14:58 UTC (permalink / raw)
To: intel-gfx
We don't know in what shape the default context was before reset.
The reset also dropped our changes that were done in
ring->init_context.
Mark our default context as uninitialized for it to be properly
setup up on reset recovery .
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
drivers/gpu/drm/i915/i915_drv.h | 4 ++--
drivers/gpu/drm/i915/i915_gem_context.c | 42 +++++++++++++++++++++------------
drivers/gpu/drm/i915/intel_lrc.c | 4 ++--
4 files changed, 32 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0ba5c71..a03361c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -176,7 +176,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
static void describe_ctx(struct seq_file *m, struct intel_context *ctx)
{
- seq_putc(m, ctx->legacy_hw_ctx.initialized ? 'I' : 'i');
+ seq_putc(m, ctx->initialized ? 'I' : 'i');
seq_putc(m, ctx->remap_slice ? 'R' : 'r');
seq_putc(m, ' ');
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 07dafa2..49b45ec 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -637,16 +637,16 @@ struct intel_context {
/* Legacy ring buffer submission */
struct {
struct drm_i915_gem_object *rcs_state;
- bool initialized;
} legacy_hw_ctx;
/* Execlists */
- bool rcs_initialized;
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
} engine[I915_NUM_RINGS];
+ bool initialized;
+
struct list_head link;
};
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index a5221d8..b479840 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -306,6 +306,8 @@ void i915_gem_context_reset(struct drm_device *dev)
i915_gem_context_unreference(lctx);
ring->last_context = NULL;
}
+
+ ring->default_context->initialized = false;
}
}
@@ -515,13 +517,30 @@ mi_set_context(struct intel_engine_cs *ring,
return ret;
}
+static void context_state_init(struct intel_engine_cs *ring,
+ struct intel_context *to)
+{
+ int ret;
+
+ if (ring->init_context) {
+ ret = ring->init_context(ring);
+ if (ret)
+ DRM_ERROR("ring init context: %d\n", ret);
+ }
+
+ if (ring->id == RCS) {
+ ret = i915_gem_render_state_init(ring);
+ if (ret)
+ DRM_ERROR("init render state: %d\n", ret);
+ }
+}
+
static int do_switch(struct intel_engine_cs *ring,
struct intel_context *to)
{
struct drm_i915_private *dev_priv = ring->dev->dev_private;
struct intel_context *from = ring->last_context;
u32 hw_flags = 0;
- bool uninitialized = false;
int ret, i;
if (from != NULL && ring == &dev_priv->ring[RCS]) {
@@ -577,7 +596,7 @@ static int do_switch(struct intel_engine_cs *ring,
vma->bind_vma(vma, to->legacy_hw_ctx.rcs_state->cache_level, GLOBAL_BIND);
}
- if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
+ if (!to->initialized || i915_gem_context_is_default(to))
hw_flags |= MI_RESTORE_INHIBIT;
ret = mi_set_context(ring, to, hw_flags);
@@ -618,26 +637,19 @@ static int do_switch(struct intel_engine_cs *ring,
/* obj is kept alive until the next request by its active ref */
i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
i915_gem_context_unreference(from);
- }
- uninitialized = !to->legacy_hw_ctx.initialized && from == NULL;
- to->legacy_hw_ctx.initialized = true;
+ /* We inherit the state from the previous context */
+ to->initialized = true;
+ }
done:
i915_gem_context_reference(to);
ring->last_context = to;
- if (uninitialized) {
- if (ring->init_context) {
- ret = ring->init_context(ring);
- if (ret)
- DRM_ERROR("ring init context: %d\n", ret);
- }
+ if (!to->initialized)
+ context_state_init(ring, to);
- ret = i915_gem_render_state_init(ring);
- if (ret)
- DRM_ERROR("init render state: %d\n", ret);
- }
+ to->initialized = true;
return 0;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 803fc38..4899a3c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1744,7 +1744,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
ring->status_page.obj = ctx_obj;
}
- if (ring->id == RCS && !ctx->rcs_initialized) {
+ if (ring->id == RCS && !ctx->initialized) {
ret = intel_lr_context_render_state_init(ring, ctx);
if (ret) {
DRM_ERROR("Init render state failed: %d\n", ret);
@@ -1753,7 +1753,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
intel_destroy_ringbuffer_obj(ringbuf);
goto error;
}
- ctx->rcs_initialized = true;
+ ctx->initialized = true;
}
return 0;
--
1.9.1
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH 1/6] drm/i915: Reinitialize default context after reset
2014-09-18 14:58 ` [PATCH 1/6] drm/i915: Reinitialize default context after reset Mika Kuoppala
@ 2014-09-18 15:36 ` Chris Wilson
2014-09-19 15:43 ` Daniel Vetter
2014-09-19 17:46 ` Volkin, Bradley D
1 sibling, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2014-09-18 15:36 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: intel-gfx
On Thu, Sep 18, 2014 at 05:58:30PM +0300, Mika Kuoppala wrote:
> We don't know in what shape the default context was before reset.
> The reset also dropped our changes that were done in
> ring->init_context.
>
> Mark our default context as uninitialized for it to be properly
> setup up on reset recovery .
The fundamental problem here is that context_init is not correctly split
into init/init_hw (or init/enable) pairs. That is we simply do not
attempt to reset the default state after reset via i915_gem_init_hw(). I
would rather we brought rationality to the gem init/init_hw sequence
first.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 1/6] drm/i915: Reinitialize default context after reset
2014-09-18 15:36 ` Chris Wilson
@ 2014-09-19 15:43 ` Daniel Vetter
0 siblings, 0 replies; 17+ messages in thread
From: Daniel Vetter @ 2014-09-19 15:43 UTC (permalink / raw)
To: Chris Wilson, Mika Kuoppala, intel-gfx
On Thu, Sep 18, 2014 at 04:36:47PM +0100, Chris Wilson wrote:
> On Thu, Sep 18, 2014 at 05:58:30PM +0300, Mika Kuoppala wrote:
> > We don't know in what shape the default context was before reset.
> > The reset also dropped our changes that were done in
> > ring->init_context.
> >
> > Mark our default context as uninitialized for it to be properly
> > setup up on reset recovery .
>
> The fundamental problem here is that context_init is not correctly split
> into init/init_hw (or init/enable) pairs. That is we simply do not
> attempt to reset the default state after reset via i915_gem_init_hw(). I
> would rather we brought rationality to the gem init/init_hw sequence
> first.
Yeah I agree with Chris that context setup doesn't follow our established
split with _init doing the software setup (and doesn't touch the hw at
all) and _init_hw doing the hw setup. _init is run at driver load, while
_init_hw is run at driver load, resume, after gpu reset and maybe even
from runtime pm code.
But I don't mind if we do this after or before this series, whatever is
less fuzz is fine with me.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 1/6] drm/i915: Reinitialize default context after reset
2014-09-18 14:58 ` [PATCH 1/6] drm/i915: Reinitialize default context after reset Mika Kuoppala
2014-09-18 15:36 ` Chris Wilson
@ 2014-09-19 17:46 ` Volkin, Bradley D
2014-10-07 14:54 ` Mika Kuoppala
1 sibling, 1 reply; 17+ messages in thread
From: Volkin, Bradley D @ 2014-09-19 17:46 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: intel-gfx@lists.freedesktop.org
[snip]
On Thu, Sep 18, 2014 at 07:58:30AM -0700, Mika Kuoppala wrote:
> @@ -577,7 +596,7 @@ static int do_switch(struct intel_engine_cs *ring,
> vma->bind_vma(vma, to->legacy_hw_ctx.rcs_state->cache_level, GLOBAL_BIND);
> }
>
> - if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
> + if (!to->initialized || i915_gem_context_is_default(to))
> hw_flags |= MI_RESTORE_INHIBIT;
>
> ret = mi_set_context(ring, to, hw_flags);
> @@ -618,26 +637,19 @@ static int do_switch(struct intel_engine_cs *ring,
> /* obj is kept alive until the next request by its active ref */
> i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
> i915_gem_context_unreference(from);
> - }
>
> - uninitialized = !to->legacy_hw_ctx.initialized && from == NULL;
> - to->legacy_hw_ctx.initialized = true;
> + /* We inherit the state from the previous context */
> + to->initialized = true;
> + }
Regarding these two hunks, I may have asked this question before but if
so I've forgotten the answer. Why do we want to set MI_RESTORE_INHIBIT
when switching to the default context? Why do we want to inherit state
from the previous context?
I assumed that when switching to an initialized context we would always
want to restore its last state.
Thanks,
Brad
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 1/6] drm/i915: Reinitialize default context after reset
2014-09-19 17:46 ` Volkin, Bradley D
@ 2014-10-07 14:54 ` Mika Kuoppala
0 siblings, 0 replies; 17+ messages in thread
From: Mika Kuoppala @ 2014-10-07 14:54 UTC (permalink / raw)
To: Volkin, Bradley D; +Cc: intel-gfx@lists.freedesktop.org
"Volkin, Bradley D" <bradley.d.volkin@intel.com> writes:
Hi Bradley,
> [snip]
>
> On Thu, Sep 18, 2014 at 07:58:30AM -0700, Mika Kuoppala wrote:
>> @@ -577,7 +596,7 @@ static int do_switch(struct intel_engine_cs *ring,
>> vma->bind_vma(vma, to->legacy_hw_ctx.rcs_state->cache_level, GLOBAL_BIND);
>> }
>>
>> - if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
>> + if (!to->initialized || i915_gem_context_is_default(to))
>> hw_flags |= MI_RESTORE_INHIBIT;
>>
>> ret = mi_set_context(ring, to, hw_flags);
>> @@ -618,26 +637,19 @@ static int do_switch(struct intel_engine_cs *ring,
>> /* obj is kept alive until the next request by its active ref */
>> i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
>> i915_gem_context_unreference(from);
>> - }
>>
>> - uninitialized = !to->legacy_hw_ctx.initialized && from == NULL;
>> - to->legacy_hw_ctx.initialized = true;
>> + /* We inherit the state from the previous context */
>> + to->initialized = true;
>> + }
>
> Regarding these two hunks, I may have asked this question before but if
> so I've forgotten the answer. Why do we want to set MI_RESTORE_INHIBIT
> when switching to the default context? Why do we want to inherit state
> from the previous context?
>
> I assumed that when switching to an initialized context we would always
> want to restore its last state.
>
I would have been pondering the same questions. Perhaps it has something
to do with the fact that default context should not assume anything
about its previous state and thus userspace needs to initialize
everything fully. And also to avoid the context switch performance
hit on restoring. Ben might know answer.
My plan is to experiment with making master copy of the first fully
initialized state and use it as a read-only bo to initialize all
subsequent contexts from. Atleast with the code, we could measure
the perf impact.
-Mika
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 2/6] drm/i915: Reinitialize default context after resume
2014-09-18 14:58 [PATCH 0/6] RFC: Persistent workarounds across reset/resume Mika Kuoppala
2014-09-18 14:58 ` [PATCH 1/6] drm/i915: Reinitialize default context after reset Mika Kuoppala
@ 2014-09-18 14:58 ` Mika Kuoppala
2014-09-18 14:58 ` [PATCH 3/6] drm/i915: Disable render idle on user forcewake Mika Kuoppala
` (3 subsequent siblings)
5 siblings, 0 replies; 17+ messages in thread
From: Mika Kuoppala @ 2014-09-18 14:58 UTC (permalink / raw)
To: intel-gfx
We have lost our ring and default context state on suspend,
including workarounds. Setup default context state like we
do in module load.
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_drv.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 8ce1b13..c86ef2c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -586,6 +586,7 @@ static int i915_drm_freeze(struct drm_device *dev)
}
i915_gem_suspend_gtt_mappings(dev);
+ i915_gem_context_reset(dev);
i915_save_state(dev);
--
1.9.1
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH 3/6] drm/i915: Disable render idle on user forcewake
2014-09-18 14:58 [PATCH 0/6] RFC: Persistent workarounds across reset/resume Mika Kuoppala
2014-09-18 14:58 ` [PATCH 1/6] drm/i915: Reinitialize default context after reset Mika Kuoppala
2014-09-18 14:58 ` [PATCH 2/6] drm/i915: Reinitialize default context after resume Mika Kuoppala
@ 2014-09-18 14:58 ` Mika Kuoppala
2014-09-18 15:30 ` Chris Wilson
2014-09-18 14:58 ` [PATCH 4/6] drm/i915: Build workaround list in ring initialization Mika Kuoppala
` (2 subsequent siblings)
5 siblings, 1 reply; 17+ messages in thread
From: Mika Kuoppala @ 2014-09-18 14:58 UTC (permalink / raw)
To: intel-gfx
Render context on gen8 is not guaranteed to be loaded (active)
at the instant when forcewake ack shows up. So we need extra
steps to get it in before we access specific registers.
We could do register white listing to do the extra dance only on
specific render context access. However the main concern is
is ring initialization after reset/resume, so only take the extra
steps on user forcewake as it is already guarding ring init. And not incur
extra perf penalty to regular register accesses. This allows us to be
sure that we don't read all zeros on RMW cycles. And we to show a
consistent state to igt when user fw has been acquired.
Based on earlier work by Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 4 ++++
drivers/gpu/drm/i915/i915_reg.h | 4 ++++
drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++++
drivers/gpu/drm/i915/intel_uncore.c | 37 ++++++++++++++++++++++++++++++---
4 files changed, 46 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a03361c..89b740b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2655,6 +2655,8 @@ static int i915_wa_registers(struct seq_file *m, void *unused)
intel_runtime_pm_get(dev_priv);
+ gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+
seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa_regs);
for (i = 0; i < dev_priv->num_wa_regs; ++i) {
u32 addr, mask;
@@ -2669,6 +2671,8 @@ static int i915_wa_registers(struct seq_file *m, void *unused)
dev_priv->intel_wa_regs[i].mask);
}
+ gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
+
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ad8179b..b98138d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1401,9 +1401,13 @@ enum punit_power_well {
#define GEN6_BLITTER_FBC_NOTIFY (1<<3)
#define GEN6_RC_SLEEP_PSMI_CONTROL 0x2050
+#define GEN6_RC_SLEEP_MSG_DISABLE (1 << 0)
#define GEN8_RC_SEMA_IDLE_MSG_DISABLE (1 << 12)
#define GEN8_FF_DOP_CLOCK_GATE_DISABLE (1<<10)
+#define GEN6_CSPWRFSM 0x22ac
+#define GEN6_CSPWRFSM_CS_NONIDLE (0x3 << 4)
+
#define GEN6_BSD_SLEEP_PSMI_CONTROL 0x12050
#define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0)
#define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 922d6bc..46cd0f9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -813,6 +813,8 @@ static int init_render_ring(struct intel_engine_cs *ring)
if (ret)
return ret;
+ gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+
/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
@@ -860,6 +862,8 @@ static int init_render_ring(struct intel_engine_cs *ring)
if (HAS_L3_DPF(dev))
I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
+ gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
+
return ret;
}
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 918b761..08587bd 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -299,6 +299,30 @@ static void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine)
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
}
+static void __render_wake_get(struct drm_i915_private *dev_priv)
+{
+ if (INTEL_INFO(dev_priv->dev)->gen != 8)
+ return;
+
+ __raw_i915_write32(dev_priv, GEN6_RC_SLEEP_PSMI_CONTROL,
+ _MASKED_BIT_ENABLE(GEN6_RC_SLEEP_MSG_DISABLE));
+
+ if (wait_for_atomic_us((__raw_i915_read32(dev_priv, GEN6_CSPWRFSM)
+ & GEN6_CSPWRFSM_CS_NONIDLE)
+ == GEN6_CSPWRFSM_CS_NONIDLE,
+ 500))
+ DRM_ERROR("Render wake get timed out\n");
+}
+
+static void __render_wake_put(struct drm_i915_private *dev_priv)
+{
+ if (INTEL_INFO(dev_priv->dev)->gen != 8)
+ return;
+
+ __raw_i915_write32(dev_priv, GEN6_RC_SLEEP_PSMI_CONTROL,
+ _MASKED_BIT_DISABLE(GEN6_RC_SLEEP_MSG_DISABLE));
+}
+
static void gen6_force_wake_timer(unsigned long arg)
{
struct drm_i915_private *dev_priv = (void *)arg;
@@ -309,8 +333,11 @@ static void gen6_force_wake_timer(unsigned long arg)
spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
WARN_ON(!dev_priv->uncore.forcewake_count);
- if (--dev_priv->uncore.forcewake_count == 0)
+ if (--dev_priv->uncore.forcewake_count == 0) {
+ __render_wake_put(dev_priv);
dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_ALL);
+ }
+
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
intel_runtime_pm_put(dev_priv);
@@ -351,8 +378,10 @@ void intel_uncore_forcewake_reset(struct drm_device *dev, bool restore)
fw = FORCEWAKE_ALL;
}
- if (fw)
+ if (fw) {
dev_priv->uncore.funcs.force_wake_get(dev_priv, fw);
+ __render_wake_get(dev_priv);
+ }
if (IS_GEN6(dev) || IS_GEN7(dev))
dev_priv->uncore.fifo_count =
@@ -415,8 +444,10 @@ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine)
return vlv_force_wake_get(dev_priv, fw_engine);
spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
- if (dev_priv->uncore.forcewake_count++ == 0)
+ if (dev_priv->uncore.forcewake_count++ == 0) {
dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL);
+ __render_wake_get(dev_priv);
+ }
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
}
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH 3/6] drm/i915: Disable render idle on user forcewake
2014-09-18 14:58 ` [PATCH 3/6] drm/i915: Disable render idle on user forcewake Mika Kuoppala
@ 2014-09-18 15:30 ` Chris Wilson
2014-09-18 15:54 ` Mika Kuoppala
0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2014-09-18 15:30 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: intel-gfx
On Thu, Sep 18, 2014 at 05:58:32PM +0300, Mika Kuoppala wrote:
> Render context on gen8 is not guaranteed to be loaded (active)
> at the instant when forcewake ack shows up. So we need extra
> steps to get it in before we access specific registers.
>
> We could do register white listing to do the extra dance only on
> specific render context access. However the main concern is
> is ring initialization after reset/resume, so only take the extra
> steps on user forcewake as it is already guarding ring init. And not incur
> extra perf penalty to regular register accesses. This allows us to be
> sure that we don't read all zeros on RMW cycles. And we to show a
> consistent state to igt when user fw has been acquired.
Might I just ask why? This talks about the need to serialise with the
LRI use to load the context registers, so be explicit. When reading
those registers, we should know what is going on with the GPU or just
don't care. Writing to those registers once the ring is running is
verboten.
This is just very thin papering over an unexplained problem.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 3/6] drm/i915: Disable render idle on user forcewake
2014-09-18 15:30 ` Chris Wilson
@ 2014-09-18 15:54 ` Mika Kuoppala
2014-09-18 16:03 ` Chris Wilson
2014-09-18 17:05 ` Ville Syrjälä
0 siblings, 2 replies; 17+ messages in thread
From: Mika Kuoppala @ 2014-09-18 15:54 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
Chris Wilson <chris@chris-wilson.co.uk> writes:
> On Thu, Sep 18, 2014 at 05:58:32PM +0300, Mika Kuoppala wrote:
>> Render context on gen8 is not guaranteed to be loaded (active)
>> at the instant when forcewake ack shows up. So we need extra
>> steps to get it in before we access specific registers.
>>
>> We could do register white listing to do the extra dance only on
>> specific render context access. However the main concern is
>> is ring initialization after reset/resume, so only take the extra
>> steps on user forcewake as it is already guarding ring init. And not incur
>> extra perf penalty to regular register accesses. This allows us to be
>> sure that we don't read all zeros on RMW cycles. And we to show a
>> consistent state to igt when user fw has been acquired.
>
> Might I just ask why? This talks about the need to serialise with the
> LRI use to load the context registers, so be explicit. When reading
> those registers, we should know what is going on with the GPU or just
> don't care. Writing to those registers once the ring is running is
> verboten.
>
> This is just very thin papering over an unexplained problem.
It is not clear why but on fw ack showing up, we have a short
delay before some registers on render contexts start to return
other than all zeroes. Usually you can get 2-3 reads before
the values show up. This manifests by sometimes the dfs/
i915_ppgtt_info will return 'render ring:PDP0' as zeroes or
top 32bits zeroes, ss it is the first read(s).
Also as our wa verification tests reads through mmio, after
getting user forcewake, we see sometimes read returning zeros.
If we do workaround verification by STORE_REGISTER_MEMs on
igt side and accept that debugfs/i915_forcewake_user doesn't
guarantee anything with regards to access to render context,
then I think that we can drop this patch.
And be very extra careful with workarounds that need RWM
on this area.
-Mika
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 3/6] drm/i915: Disable render idle on user forcewake
2014-09-18 15:54 ` Mika Kuoppala
@ 2014-09-18 16:03 ` Chris Wilson
2014-09-18 17:05 ` Ville Syrjälä
1 sibling, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2014-09-18 16:03 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: intel-gfx
On Thu, Sep 18, 2014 at 06:54:15PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
>
> > On Thu, Sep 18, 2014 at 05:58:32PM +0300, Mika Kuoppala wrote:
> >> Render context on gen8 is not guaranteed to be loaded (active)
> >> at the instant when forcewake ack shows up. So we need extra
> >> steps to get it in before we access specific registers.
> >>
> >> We could do register white listing to do the extra dance only on
> >> specific render context access. However the main concern is
> >> is ring initialization after reset/resume, so only take the extra
> >> steps on user forcewake as it is already guarding ring init. And not incur
> >> extra perf penalty to regular register accesses. This allows us to be
> >> sure that we don't read all zeros on RMW cycles. And we to show a
> >> consistent state to igt when user fw has been acquired.
> >
> > Might I just ask why? This talks about the need to serialise with the
> > LRI use to load the context registers, so be explicit. When reading
> > those registers, we should know what is going on with the GPU or just
> > don't care. Writing to those registers once the ring is running is
> > verboten.
> >
> > This is just very thin papering over an unexplained problem.
>
> It is not clear why but on fw ack showing up, we have a short
> delay before some registers on render contexts start to return
> other than all zeroes. Usually you can get 2-3 reads before
> the values show up. This manifests by sometimes the dfs/
> i915_ppgtt_info will return 'render ring:PDP0' as zeroes or
> top 32bits zeroes, ss it is the first read(s).
>
> Also as our wa verification tests reads through mmio, after
> getting user forcewake, we see sometimes read returning zeros.
>
> If we do workaround verification by STORE_REGISTER_MEMs on
> igt side and accept that debugfs/i915_forcewake_user doesn't
> guarantee anything with regards to access to render context,
> then I think that we can drop this patch.
>
> And be very extra careful with workarounds that need RWM
> on this area.
I think you want to push the burden of work into the igt here. It first
must flush the GPU and ensure that is idle (which is done for us by the
framework anyway), then busy spin waiting for the hardware to quiesce
before checking the w/a in that case.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 3/6] drm/i915: Disable render idle on user forcewake
2014-09-18 15:54 ` Mika Kuoppala
2014-09-18 16:03 ` Chris Wilson
@ 2014-09-18 17:05 ` Ville Syrjälä
1 sibling, 0 replies; 17+ messages in thread
From: Ville Syrjälä @ 2014-09-18 17:05 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: intel-gfx
On Thu, Sep 18, 2014 at 06:54:15PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
>
> > On Thu, Sep 18, 2014 at 05:58:32PM +0300, Mika Kuoppala wrote:
> >> Render context on gen8 is not guaranteed to be loaded (active)
> >> at the instant when forcewake ack shows up. So we need extra
> >> steps to get it in before we access specific registers.
> >>
> >> We could do register white listing to do the extra dance only on
> >> specific render context access. However the main concern is
> >> is ring initialization after reset/resume, so only take the extra
> >> steps on user forcewake as it is already guarding ring init. And not incur
> >> extra perf penalty to regular register accesses. This allows us to be
> >> sure that we don't read all zeros on RMW cycles. And we to show a
> >> consistent state to igt when user fw has been acquired.
> >
> > Might I just ask why? This talks about the need to serialise with the
> > LRI use to load the context registers, so be explicit. When reading
> > those registers, we should know what is going on with the GPU or just
> > don't care. Writing to those registers once the ring is running is
> > verboten.
> >
> > This is just very thin papering over an unexplained problem.
>
> It is not clear why but on fw ack showing up, we have a short
> delay before some registers on render contexts start to return
> other than all zeroes. Usually you can get 2-3 reads before
> the values show up. This manifests by sometimes the dfs/
> i915_ppgtt_info will return 'render ring:PDP0' as zeroes or
> top 32bits zeroes, ss it is the first read(s).
>
> Also as our wa verification tests reads through mmio, after
> getting user forcewake, we see sometimes read returning zeros.
>
> If we do workaround verification by STORE_REGISTER_MEMs on
> igt side and accept that debugfs/i915_forcewake_user doesn't
> guarantee anything with regards to access to render context,
> then I think that we can drop this patch.
>
> And be very extra careful with workarounds that need RWM
> on this area.
While the spec is quite unclear, I've come up with this theory
on the HW operation:
1. forcewak req=1
2. load power context (includes CCID)
3. forcewake ack=1
4. load render context based on CCID
So if you manage to read the context saved registers via mmio
before step 4 you get the power on defaults instead of the
values from the context.
And on GPU reset we lose CCID so we also lose the workarounds
because following the reset we switch to the default context
with restore_inbhibit=1. Once/if we switch to an already initialized
proper context, the workarounds should magically reappear and should
subsequently be saved to the default context(s) even. So afterwards
going at least once into rc6 should bring the workarounds back even
for a default context. And similarly I suppose we lose the power
context+CCID on suspend/resume so we also lose the workarounds there.
Would be somewhat interesting to see if these theories hold up to
reality.
--
Ville Syrjälä
Intel OTC
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 4/6] drm/i915: Build workaround list in ring initialization
2014-09-18 14:58 [PATCH 0/6] RFC: Persistent workarounds across reset/resume Mika Kuoppala
` (2 preceding siblings ...)
2014-09-18 14:58 ` [PATCH 3/6] drm/i915: Disable render idle on user forcewake Mika Kuoppala
@ 2014-09-18 14:58 ` Mika Kuoppala
2014-09-19 15:49 ` Daniel Vetter
2014-09-18 14:58 ` [PATCH 5/6] drm/i915: Add ivb workarounds into the wa list Mika Kuoppala
2014-09-18 14:58 ` [PATCH 6/6] drm/i915: Check workaround status on dfs read time Mika Kuoppala
5 siblings, 1 reply; 17+ messages in thread
From: Mika Kuoppala @ 2014-09-18 14:58 UTC (permalink / raw)
To: intel-gfx
to disassociate workaround list init from the actual writing
of values. This is needed as not workarounds will be masked bit
enables and we want full control on when the read part of RMW
will happen.
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 18 +--
drivers/gpu/drm/i915/i915_drv.h | 28 ++---
drivers/gpu/drm/i915/intel_ringbuffer.c | 188 ++++++++++++++++++--------------
3 files changed, 129 insertions(+), 105 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 89b740b..c35c6ce 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2657,18 +2657,18 @@ static int i915_wa_registers(struct seq_file *m, void *unused)
gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
- seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa_regs);
- for (i = 0; i < dev_priv->num_wa_regs; ++i) {
+ seq_printf(m, "Workarounds applied: %d\n", dev_priv->workarounds.count);
+ for (i = 0; i < dev_priv->workarounds.count; ++i) {
u32 addr, mask;
- addr = dev_priv->intel_wa_regs[i].addr;
- mask = dev_priv->intel_wa_regs[i].mask;
- dev_priv->intel_wa_regs[i].value = I915_READ(addr) | mask;
- if (dev_priv->intel_wa_regs[i].addr)
+ addr = dev_priv->workarounds.reg[i].addr;
+ mask = dev_priv->workarounds.reg[i].mask;
+ dev_priv->workarounds.reg[i].value = I915_READ(addr) | mask;
+ if (dev_priv->workarounds.reg[i].addr)
seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
- dev_priv->intel_wa_regs[i].addr,
- dev_priv->intel_wa_regs[i].value,
- dev_priv->intel_wa_regs[i].mask);
+ dev_priv->workarounds.reg[i].addr,
+ dev_priv->workarounds.reg[i].value,
+ dev_priv->workarounds.reg[i].mask);
}
gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 49b45ec..3087d5a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1445,6 +1445,20 @@ struct i915_frontbuffer_tracking {
unsigned flip_bits;
};
+struct i915_wa_reg {
+ u32 addr;
+ u32 value;
+ /* bitmask representing WA bits */
+ u32 mask;
+};
+
+#define I915_MAX_WA_REGS 16
+
+struct i915_workarounds {
+ struct i915_wa_reg reg[I915_MAX_WA_REGS];
+ u32 count;
+};
+
struct drm_i915_private {
struct drm_device *dev;
struct kmem_cache *slab;
@@ -1587,19 +1601,7 @@ struct drm_i915_private {
struct intel_shared_dpll shared_dplls[I915_NUM_PLLS];
int dpio_phy_iosf_port[I915_NUM_PHYS_VLV];
- /*
- * workarounds are currently applied at different places and
- * changes are being done to consolidate them so exact count is
- * not clear at this point, use a max value for now.
- */
-#define I915_MAX_WA_REGS 16
- struct {
- u32 addr;
- u32 value;
- /* bitmask representing WA bits */
- u32 mask;
- } intel_wa_regs[I915_MAX_WA_REGS];
- u32 num_wa_regs;
+ struct i915_workarounds workarounds;
/* Reclocking support */
bool render_reclock_avail;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 46cd0f9..4f336e23 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -665,87 +665,113 @@ err:
return ret;
}
-static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
- u32 addr, u32 value)
+static int intel_ring_workarounds_emit(struct intel_engine_cs *ring)
{
+ int ret, i;
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
+ struct i915_workarounds *w = &dev_priv->workarounds;
- if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS))
- return;
+ if (WARN_ON(w->count == 0))
+ return 0;
- intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
- intel_ring_emit(ring, addr);
- intel_ring_emit(ring, value);
+ ring->gpu_caches_dirty = true;
+ ret = intel_ring_flush_all_caches(ring);
+ if (ret)
+ return ret;
- dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
- dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF;
- /* value is updated with the status of remaining bits of this
- * register when it is read from debugfs file
- */
- dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
- dev_priv->num_wa_regs++;
+ ret = intel_ring_begin(ring, w->count * 3);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < w->count; i++) {
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, w->reg[i].addr);
+ intel_ring_emit(ring, w->reg[i].value);
+ }
+
+ intel_ring_advance(ring);
+
+ ring->gpu_caches_dirty = true;
+ ret = intel_ring_flush_all_caches(ring);
+ if (ret)
+ return ret;
+
+ DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
- return;
+ return 0;
+}
+
+static int wa_add(struct drm_i915_private *dev_priv,
+ const u32 addr, const u32 val, const u32 mask)
+{
+ const u32 idx = dev_priv->workarounds.count;
+
+ if (WARN_ON(idx >= I915_MAX_WA_REGS))
+ return -ENOSPC;
+
+ dev_priv->workarounds.reg[idx].addr = addr;
+ dev_priv->workarounds.reg[idx].value = val;
+ dev_priv->workarounds.reg[idx].mask = mask;
+
+ dev_priv->workarounds.count++;
+
+ return 0;
}
+#define WA_REG(addr, val, mask) { \
+ const int r = wa_add(dev_priv, (addr), (val), (mask)); \
+ if (r) return r; }
+
+#define WA_SET_BIT_MASKED(addr, mask) WA_REG(addr, \
+ _MASKED_BIT_ENABLE(mask), (mask) & 0xffff)
+
+#define WA_CLR_BIT_MASKED(addr, mask) WA_REG(addr, \
+ _MASKED_BIT_DISABLE(mask), (mask) & 0xffff)
+
+#define WA_SET_BIT(addr, mask) WA_REG(addr, I915_READ(addr) | (mask), mask)
+#define WA_CLR_BIT(addr, mask) WA_REG(addr, I915_READ(addr) & ~(mask), mask)
+
+#define WA_WRITE(addr, val) WA_REG(addr, val, 0xffffffff)
+
static int bdw_init_workarounds(struct intel_engine_cs *ring)
{
- int ret;
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
- /*
- * workarounds applied in this fn are part of register state context,
- * they need to be re-initialized followed by gpu reset, suspend/resume,
- * module reload.
- */
- dev_priv->num_wa_regs = 0;
- memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
-
- /*
- * update the number of dwords required based on the
- * actual number of workarounds applied
- */
- ret = intel_ring_begin(ring, 24);
- if (ret)
- return ret;
-
/* WaDisablePartialInstShootdown:bdw */
/* WaDisableThreadStallDopClockGating:bdw */
- /* FIXME: Unclear whether we really need this on production bdw. */
- intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
- _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
- | STALL_DOP_GATING_DISABLE));
+ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+ PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
+ STALL_DOP_GATING_DISABLE);
/* WaDisableDopClockGating:bdw May not be needed for production */
- intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
- _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
+ DOP_CLOCK_GATING_DISABLE);
/*
* This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
* pre-production hardware
*/
- intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
- _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS
- | GEN8_SAMPLER_POWER_BYPASS_DIS));
+ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
+ GEN8_CENTROID_PIXEL_OPT_DIS | GEN8_SAMPLER_POWER_BYPASS_DIS);
- intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1,
- _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
+ WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
+ GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE);
- intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2,
- _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
+ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+ GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
/* Use Force Non-Coherent whenever executing a 3D context. This is a
* workaround for for a possible hang in the unlikely event a TLB
* invalidation occurs during a PSD flush.
*/
- intel_ring_emit_wa(ring, HDC_CHICKEN0,
- _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
+ WA_SET_BIT_MASKED(HDC_CHICKEN0,
+ HDC_FORCE_NON_COHERENT);
/* Wa4x4STCOptimizationDisable:bdw */
- intel_ring_emit_wa(ring, CACHE_MODE_1,
- _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
+ WA_SET_BIT_MASKED(CACHE_MODE_1,
+ GEN8_4x4_STC_OPTIMIZATION_DISABLE);
/*
* BSpec recommends 8x4 when MSAA is used,
@@ -755,52 +781,50 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
* disable bit, which we don't touch here, but it's good
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
*/
- intel_ring_emit_wa(ring, GEN7_GT_MODE,
- GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
-
- intel_ring_advance(ring);
-
- DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
- dev_priv->num_wa_regs);
+ WA_SET_BIT_MASKED(GEN7_GT_MODE,
+ GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
return 0;
}
static int chv_init_workarounds(struct intel_engine_cs *ring)
{
- int ret;
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
- /*
- * workarounds applied in this fn are part of register state context,
- * they need to be re-initialized followed by gpu reset, suspend/resume,
- * module reload.
- */
- dev_priv->num_wa_regs = 0;
- memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
-
- ret = intel_ring_begin(ring, 12);
- if (ret)
- return ret;
-
/* WaDisablePartialInstShootdown:chv */
- intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
- _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
+ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+ PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
/* WaDisableThreadStallDopClockGating:chv */
- intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
- _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
+ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+ STALL_DOP_GATING_DISABLE);
/* WaDisableDopClockGating:chv (pre-production hw) */
- intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
- _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
+ DOP_CLOCK_GATING_DISABLE);
/* WaDisableSamplerPowerBypass:chv (pre-production hw) */
- intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
- _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
+ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
+ GEN8_SAMPLER_POWER_BYPASS_DIS);
- intel_ring_advance(ring);
+ return 0;
+}
+
+static int init_workarounds_ring(struct intel_engine_cs *ring)
+{
+ struct drm_device *dev = ring->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ WARN_ON(ring->id != RCS);
+
+ dev_priv->workarounds.count = 0;
+
+ if (IS_BROADWELL(dev))
+ return bdw_init_workarounds(ring);
+
+ if (IS_CHERRYVIEW(dev))
+ return chv_init_workarounds(ring);
return 0;
}
@@ -864,7 +888,7 @@ static int init_render_ring(struct intel_engine_cs *ring)
gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
- return ret;
+ return init_workarounds_ring(ring);
}
static void render_ring_cleanup(struct intel_engine_cs *ring)
@@ -2305,10 +2329,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
dev_priv->semaphore_obj = obj;
}
}
- if (IS_CHERRYVIEW(dev))
- ring->init_context = chv_init_workarounds;
- else
- ring->init_context = bdw_init_workarounds;
+
+ ring->init_context = intel_ring_workarounds_emit;
ring->add_request = gen6_add_request;
ring->flush = gen8_render_ring_flush;
ring->irq_get = gen8_ring_get_irq;
--
1.9.1
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH 4/6] drm/i915: Build workaround list in ring initialization
2014-09-18 14:58 ` [PATCH 4/6] drm/i915: Build workaround list in ring initialization Mika Kuoppala
@ 2014-09-19 15:49 ` Daniel Vetter
0 siblings, 0 replies; 17+ messages in thread
From: Daniel Vetter @ 2014-09-19 15:49 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: intel-gfx
On Thu, Sep 18, 2014 at 05:58:33PM +0300, Mika Kuoppala wrote:
> to disassociate workaround list init from the actual writing
> of values. This is needed as not workarounds will be masked bit
> enables and we want full control on when the read part of RMW
> will happen.
>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
The piece imo still missing here is adding all the other (non-render
context) wa registers to the wa list. It looks like wa_add should be able
to cope, but I prefer we check that by e.g. reworking all gen7+
clock_gating wa to use this.
This would also mean that we need to make these functions non-static.
Might as well go nuts and extract most of the w/a functionality into a new
intel_wa.c with a bit of DOC: overview sections and the important
functions (intel_wa_add) having proper kerneldoc. Of course we can do this
as a follow up once things settle a bit.
Otherwise I think this is going in the right direction.
-Daniel
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 18 +--
> drivers/gpu/drm/i915/i915_drv.h | 28 ++---
> drivers/gpu/drm/i915/intel_ringbuffer.c | 188 ++++++++++++++++++--------------
> 3 files changed, 129 insertions(+), 105 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 89b740b..c35c6ce 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2657,18 +2657,18 @@ static int i915_wa_registers(struct seq_file *m, void *unused)
>
> gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
>
> - seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa_regs);
> - for (i = 0; i < dev_priv->num_wa_regs; ++i) {
> + seq_printf(m, "Workarounds applied: %d\n", dev_priv->workarounds.count);
> + for (i = 0; i < dev_priv->workarounds.count; ++i) {
> u32 addr, mask;
>
> - addr = dev_priv->intel_wa_regs[i].addr;
> - mask = dev_priv->intel_wa_regs[i].mask;
> - dev_priv->intel_wa_regs[i].value = I915_READ(addr) | mask;
> - if (dev_priv->intel_wa_regs[i].addr)
> + addr = dev_priv->workarounds.reg[i].addr;
> + mask = dev_priv->workarounds.reg[i].mask;
> + dev_priv->workarounds.reg[i].value = I915_READ(addr) | mask;
> + if (dev_priv->workarounds.reg[i].addr)
> seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
> - dev_priv->intel_wa_regs[i].addr,
> - dev_priv->intel_wa_regs[i].value,
> - dev_priv->intel_wa_regs[i].mask);
> + dev_priv->workarounds.reg[i].addr,
> + dev_priv->workarounds.reg[i].value,
> + dev_priv->workarounds.reg[i].mask);
> }
>
> gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 49b45ec..3087d5a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1445,6 +1445,20 @@ struct i915_frontbuffer_tracking {
> unsigned flip_bits;
> };
>
> +struct i915_wa_reg {
> + u32 addr;
> + u32 value;
> + /* bitmask representing WA bits */
> + u32 mask;
> +};
> +
> +#define I915_MAX_WA_REGS 16
> +
> +struct i915_workarounds {
> + struct i915_wa_reg reg[I915_MAX_WA_REGS];
> + u32 count;
> +};
> +
> struct drm_i915_private {
> struct drm_device *dev;
> struct kmem_cache *slab;
> @@ -1587,19 +1601,7 @@ struct drm_i915_private {
> struct intel_shared_dpll shared_dplls[I915_NUM_PLLS];
> int dpio_phy_iosf_port[I915_NUM_PHYS_VLV];
>
> - /*
> - * workarounds are currently applied at different places and
> - * changes are being done to consolidate them so exact count is
> - * not clear at this point, use a max value for now.
> - */
> -#define I915_MAX_WA_REGS 16
> - struct {
> - u32 addr;
> - u32 value;
> - /* bitmask representing WA bits */
> - u32 mask;
> - } intel_wa_regs[I915_MAX_WA_REGS];
> - u32 num_wa_regs;
> + struct i915_workarounds workarounds;
>
> /* Reclocking support */
> bool render_reclock_avail;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 46cd0f9..4f336e23 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -665,87 +665,113 @@ err:
> return ret;
> }
>
> -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
> - u32 addr, u32 value)
> +static int intel_ring_workarounds_emit(struct intel_engine_cs *ring)
> {
> + int ret, i;
> struct drm_device *dev = ring->dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
> + struct i915_workarounds *w = &dev_priv->workarounds;
>
> - if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS))
> - return;
> + if (WARN_ON(w->count == 0))
> + return 0;
>
> - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> - intel_ring_emit(ring, addr);
> - intel_ring_emit(ring, value);
> + ring->gpu_caches_dirty = true;
> + ret = intel_ring_flush_all_caches(ring);
> + if (ret)
> + return ret;
>
> - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
> - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF;
> - /* value is updated with the status of remaining bits of this
> - * register when it is read from debugfs file
> - */
> - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
> - dev_priv->num_wa_regs++;
> + ret = intel_ring_begin(ring, w->count * 3);
> + if (ret)
> + return ret;
> +
> + for (i = 0; i < w->count; i++) {
> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> + intel_ring_emit(ring, w->reg[i].addr);
> + intel_ring_emit(ring, w->reg[i].value);
> + }
> +
> + intel_ring_advance(ring);
> +
> + ring->gpu_caches_dirty = true;
> + ret = intel_ring_flush_all_caches(ring);
> + if (ret)
> + return ret;
> +
> + DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
>
> - return;
> + return 0;
> +}
> +
> +static int wa_add(struct drm_i915_private *dev_priv,
> + const u32 addr, const u32 val, const u32 mask)
> +{
> + const u32 idx = dev_priv->workarounds.count;
> +
> + if (WARN_ON(idx >= I915_MAX_WA_REGS))
> + return -ENOSPC;
> +
> + dev_priv->workarounds.reg[idx].addr = addr;
> + dev_priv->workarounds.reg[idx].value = val;
> + dev_priv->workarounds.reg[idx].mask = mask;
> +
> + dev_priv->workarounds.count++;
> +
> + return 0;
> }
>
> +#define WA_REG(addr, val, mask) { \
> + const int r = wa_add(dev_priv, (addr), (val), (mask)); \
> + if (r) return r; }
> +
> +#define WA_SET_BIT_MASKED(addr, mask) WA_REG(addr, \
> + _MASKED_BIT_ENABLE(mask), (mask) & 0xffff)
> +
> +#define WA_CLR_BIT_MASKED(addr, mask) WA_REG(addr, \
> + _MASKED_BIT_DISABLE(mask), (mask) & 0xffff)
> +
> +#define WA_SET_BIT(addr, mask) WA_REG(addr, I915_READ(addr) | (mask), mask)
> +#define WA_CLR_BIT(addr, mask) WA_REG(addr, I915_READ(addr) & ~(mask), mask)
> +
> +#define WA_WRITE(addr, val) WA_REG(addr, val, 0xffffffff)
> +
> static int bdw_init_workarounds(struct intel_engine_cs *ring)
> {
> - int ret;
> struct drm_device *dev = ring->dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
>
> - /*
> - * workarounds applied in this fn are part of register state context,
> - * they need to be re-initialized followed by gpu reset, suspend/resume,
> - * module reload.
> - */
> - dev_priv->num_wa_regs = 0;
> - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
> -
> - /*
> - * update the number of dwords required based on the
> - * actual number of workarounds applied
> - */
> - ret = intel_ring_begin(ring, 24);
> - if (ret)
> - return ret;
> -
> /* WaDisablePartialInstShootdown:bdw */
> /* WaDisableThreadStallDopClockGating:bdw */
> - /* FIXME: Unclear whether we really need this on production bdw. */
> - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
> - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
> - | STALL_DOP_GATING_DISABLE));
> + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
> + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
> + STALL_DOP_GATING_DISABLE);
>
> /* WaDisableDopClockGating:bdw May not be needed for production */
> - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
> - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
> + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
> + DOP_CLOCK_GATING_DISABLE);
>
> /*
> * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
> * pre-production hardware
> */
> - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
> - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS
> - | GEN8_SAMPLER_POWER_BYPASS_DIS));
> + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
> + GEN8_CENTROID_PIXEL_OPT_DIS | GEN8_SAMPLER_POWER_BYPASS_DIS);
>
> - intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1,
> - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
> + WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
> + GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE);
>
> - intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2,
> - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
> + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
> + GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
>
> /* Use Force Non-Coherent whenever executing a 3D context. This is a
> * workaround for for a possible hang in the unlikely event a TLB
> * invalidation occurs during a PSD flush.
> */
> - intel_ring_emit_wa(ring, HDC_CHICKEN0,
> - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
> + WA_SET_BIT_MASKED(HDC_CHICKEN0,
> + HDC_FORCE_NON_COHERENT);
>
> /* Wa4x4STCOptimizationDisable:bdw */
> - intel_ring_emit_wa(ring, CACHE_MODE_1,
> - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
> + WA_SET_BIT_MASKED(CACHE_MODE_1,
> + GEN8_4x4_STC_OPTIMIZATION_DISABLE);
>
> /*
> * BSpec recommends 8x4 when MSAA is used,
> @@ -755,52 +781,50 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
> * disable bit, which we don't touch here, but it's good
> * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
> */
> - intel_ring_emit_wa(ring, GEN7_GT_MODE,
> - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
> -
> - intel_ring_advance(ring);
> -
> - DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
> - dev_priv->num_wa_regs);
> + WA_SET_BIT_MASKED(GEN7_GT_MODE,
> + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
>
> return 0;
> }
>
> static int chv_init_workarounds(struct intel_engine_cs *ring)
> {
> - int ret;
> struct drm_device *dev = ring->dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
>
> - /*
> - * workarounds applied in this fn are part of register state context,
> - * they need to be re-initialized followed by gpu reset, suspend/resume,
> - * module reload.
> - */
> - dev_priv->num_wa_regs = 0;
> - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
> -
> - ret = intel_ring_begin(ring, 12);
> - if (ret)
> - return ret;
> -
> /* WaDisablePartialInstShootdown:chv */
> - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
> - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
> + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
> + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
>
> /* WaDisableThreadStallDopClockGating:chv */
> - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
> - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
> + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
> + STALL_DOP_GATING_DISABLE);
>
> /* WaDisableDopClockGating:chv (pre-production hw) */
> - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
> - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
> + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
> + DOP_CLOCK_GATING_DISABLE);
>
> /* WaDisableSamplerPowerBypass:chv (pre-production hw) */
> - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
> - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
> + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
> + GEN8_SAMPLER_POWER_BYPASS_DIS);
>
> - intel_ring_advance(ring);
> + return 0;
> +}
> +
> +static int init_workarounds_ring(struct intel_engine_cs *ring)
> +{
> + struct drm_device *dev = ring->dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> +
> + WARN_ON(ring->id != RCS);
> +
> + dev_priv->workarounds.count = 0;
> +
> + if (IS_BROADWELL(dev))
> + return bdw_init_workarounds(ring);
> +
> + if (IS_CHERRYVIEW(dev))
> + return chv_init_workarounds(ring);
>
> return 0;
> }
> @@ -864,7 +888,7 @@ static int init_render_ring(struct intel_engine_cs *ring)
>
> gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
>
> - return ret;
> + return init_workarounds_ring(ring);
> }
>
> static void render_ring_cleanup(struct intel_engine_cs *ring)
> @@ -2305,10 +2329,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
> dev_priv->semaphore_obj = obj;
> }
> }
> - if (IS_CHERRYVIEW(dev))
> - ring->init_context = chv_init_workarounds;
> - else
> - ring->init_context = bdw_init_workarounds;
> +
> + ring->init_context = intel_ring_workarounds_emit;
> ring->add_request = gen6_add_request;
> ring->flush = gen8_render_ring_flush;
> ring->irq_get = gen8_ring_get_irq;
> --
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 5/6] drm/i915: Add ivb workarounds into the wa list.
2014-09-18 14:58 [PATCH 0/6] RFC: Persistent workarounds across reset/resume Mika Kuoppala
` (3 preceding siblings ...)
2014-09-18 14:58 ` [PATCH 4/6] drm/i915: Build workaround list in ring initialization Mika Kuoppala
@ 2014-09-18 14:58 ` Mika Kuoppala
2014-09-18 14:58 ` [PATCH 6/6] drm/i915: Check workaround status on dfs read time Mika Kuoppala
5 siblings, 0 replies; 17+ messages in thread
From: Mika Kuoppala @ 2014-09-18 14:58 UTC (permalink / raw)
To: intel-gfx
So that we write them at ring initialization and thus
have them applied correctly after reset/resume.
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_reg.h | 2 +-
drivers/gpu/drm/i915/intel_pm.c | 61 +------------------
drivers/gpu/drm/i915/intel_ringbuffer.c | 100 ++++++++++++++++++++++++++++++++
3 files changed, 102 insertions(+), 61 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b98138d..99cd494 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4818,7 +4818,7 @@ enum punit_power_well {
/* GEN7 chicken */
#define GEN7_COMMON_SLICE_CHICKEN1 0x7010
-# define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1<<10) | (1<<26))
+# define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC (1<<10)
#define COMMON_SLICE_CHICKEN2 0x7014
# define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE (1<<0)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 675e8a2..d8151a7 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5712,7 +5712,7 @@ static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
/*
- * WaVSThreadDispatchOverride:ivb,vlv
+ * WaVSThreadDispatchOverride:vlv
*
* This actually overrides the dispatch
* mode for all thread types.
@@ -5866,73 +5866,14 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
- /* WaDisableEarlyCull:ivb */
- I915_WRITE(_3D_CHICKEN3,
- _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
-
- /* WaDisableBackToBackFlipFix:ivb */
- I915_WRITE(IVB_CHICKEN3,
- CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
- CHICKEN3_DGMG_DONE_FIX_DISABLE);
-
- /* WaDisablePSDDualDispatchEnable:ivb */
- if (IS_IVB_GT1(dev))
- I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
- _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
-
- /* WaDisable_RenderCache_OperationalFlush:ivb */
- I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
-
- /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
- I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
- GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
-
- /* WaApplyL3ControlAndL3ChickenMode:ivb */
- I915_WRITE(GEN7_L3CNTLREG1,
- GEN7_WA_FOR_GEN7_L3_CONTROL);
- I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
- GEN7_WA_L3_CHICKEN_MODE);
- if (IS_IVB_GT1(dev))
- I915_WRITE(GEN7_ROW_CHICKEN2,
- _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
- else {
- /* must write both registers */
- I915_WRITE(GEN7_ROW_CHICKEN2,
- _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
- I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
- _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
- }
-
- /* WaForceL3Serialization:ivb */
- I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
- ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
-
- /*
- * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
- * This implements the WaDisableRCZUnitClockGating:ivb workaround.
- */
- I915_WRITE(GEN6_UCGCTL2,
- GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
-
- /* This is required by WaCatErrorRejectionIssue:ivb */
- I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
- I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
- GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
-
g4x_disable_trickle_feed(dev);
- gen7_setup_fixed_func_scheduler(dev_priv);
-
if (0) { /* causes HiZ corruption on ivb:gt1 */
/* enable HiZ Raw Stall Optimization */
I915_WRITE(CACHE_MODE_0_GEN7,
_MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
}
- /* WaDisable4x2SubspanOptimization:ivb */
- I915_WRITE(CACHE_MODE_1,
- _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
-
/*
* BSpec recommends 8x4 when MSAA is used,
* however in practice 16x4 seems fastest.
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 4f336e23..abbb9f8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -702,6 +702,21 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring)
return 0;
}
+static void intel_ring_workarounds_write(struct intel_engine_cs *ring)
+{
+ struct drm_device *dev = ring->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct i915_workarounds *w = &dev_priv->workarounds;
+ int i;
+
+ for (i = 0; i < w->count; i++) {
+ I915_WRITE(dev_priv->workarounds.reg[i].addr,
+ dev_priv->workarounds.reg[i].value);
+ }
+
+ DRM_DEBUG_DRIVER("Number of Workarounds written: %d\n", w->count);
+}
+
static int wa_add(struct drm_i915_private *dev_priv,
const u32 addr, const u32 val, const u32 mask)
{
@@ -734,6 +749,82 @@ static int wa_add(struct drm_i915_private *dev_priv,
#define WA_WRITE(addr, val) WA_REG(addr, val, 0xffffffff)
+static int ivb_init_workarounds(struct intel_engine_cs *ring)
+{
+ struct drm_device *dev = ring->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ /* WaDisableEarlyCull:ivb */
+ WA_SET_BIT_MASKED(_3D_CHICKEN3,
+ _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
+
+ /* WaDisableBackToBackFlipFix:ivb */
+ WA_WRITE(IVB_CHICKEN3,
+ CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
+ CHICKEN3_DGMG_DONE_FIX_DISABLE);
+
+ /* WaDisablePSDDualDispatchEnable:ivb */
+ if (IS_IVB_GT1(dev))
+ WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
+ GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+
+ /* WaDisable_RenderCache_OperationalFlush:ivb */
+ WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
+
+ /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
+ WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
+ GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
+
+ /* WaApplyL3ControlAndL3ChickenMode:ivb */
+ WA_WRITE(GEN7_L3CNTLREG1,
+ GEN7_WA_FOR_GEN7_L3_CONTROL);
+ WA_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
+ GEN7_WA_L3_CHICKEN_MODE);
+ if (IS_IVB_GT1(dev)) {
+ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
+ DOP_CLOCK_GATING_DISABLE);
+ } else {
+ /* must write both registers */
+ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
+ DOP_CLOCK_GATING_DISABLE);
+ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2_GT2,
+ DOP_CLOCK_GATING_DISABLE);
+ }
+
+ /* WaForceL3Serialization:ivb */
+ WA_CLR_BIT(GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
+ /*
+ * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
+ * This implements the WaDisableRCZUnitClockGating:ivb workaround.
+ */
+ WA_WRITE(GEN6_UCGCTL2,
+ GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
+
+ /* This is required by WaCatErrorRejectionIssue:ivb */
+ WA_SET_BIT(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
+ GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
+
+ /*
+ * WaVSThreadDispatchOverride:ivb
+ *
+ * This actually overrides the dispatch
+ * mode for all thread types.
+ */
+
+#define SET_FF_SCHED(v) ((v) & ~GEN7_FF_SCHED_MASK) \
+ | GEN7_FF_TS_SCHED_HW | GEN7_FF_VS_SCHED_HW | GEN7_FF_DS_SCHED_HW
+
+ WA_REG(GEN7_FF_THREAD_MODE,
+ SET_FF_SCHED(I915_READ(GEN7_FF_THREAD_MODE)), GEN7_FF_SCHED_MASK);
+
+ /* WaDisable4x2SubspanOptimization:ivb */
+ WA_SET_BIT_MASKED(CACHE_MODE_1,
+ PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
+
+ return 0;
+}
+
static int bdw_init_workarounds(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
@@ -815,11 +906,20 @@ static int init_workarounds_ring(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
+ int ret;
WARN_ON(ring->id != RCS);
dev_priv->workarounds.count = 0;
+ if (IS_IVYBRIDGE(dev)) {
+ ret = ivb_init_workarounds(ring);
+ if (ret)
+ return ret;
+
+ intel_ring_workarounds_write(ring);
+ }
+
if (IS_BROADWELL(dev))
return bdw_init_workarounds(ring);
--
1.9.1
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH 6/6] drm/i915: Check workaround status on dfs read time
2014-09-18 14:58 [PATCH 0/6] RFC: Persistent workarounds across reset/resume Mika Kuoppala
` (4 preceding siblings ...)
2014-09-18 14:58 ` [PATCH 5/6] drm/i915: Add ivb workarounds into the wa list Mika Kuoppala
@ 2014-09-18 14:58 ` Mika Kuoppala
2014-09-18 15:18 ` [PATCH] tests/gem_workarounds: adapt to constant wa list from driver Mika Kuoppala
5 siblings, 1 reply; 17+ messages in thread
From: Mika Kuoppala @ 2014-09-18 14:58 UTC (permalink / raw)
To: intel-gfx
As the workaround list has the value as initialization time
constant, we can do the simple checking on the go. For the
user that doesn't have igt at hand and still server the igt's
needs.
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c35c6ce..962c41e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2659,16 +2659,16 @@ static int i915_wa_registers(struct seq_file *m, void *unused)
seq_printf(m, "Workarounds applied: %d\n", dev_priv->workarounds.count);
for (i = 0; i < dev_priv->workarounds.count; ++i) {
- u32 addr, mask;
+ u32 addr, mask, value, read;
+ bool ok;
addr = dev_priv->workarounds.reg[i].addr;
mask = dev_priv->workarounds.reg[i].mask;
- dev_priv->workarounds.reg[i].value = I915_READ(addr) | mask;
- if (dev_priv->workarounds.reg[i].addr)
- seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
- dev_priv->workarounds.reg[i].addr,
- dev_priv->workarounds.reg[i].value,
- dev_priv->workarounds.reg[i].mask);
+ value = dev_priv->workarounds.reg[i].value;
+ read = I915_READ(addr);
+ ok = (value & mask) == (read & mask);
+ seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X, read: 0x%08x, status: %s\n",
+ addr, value, mask, read, ok ? "OK" : "FAIL");
}
gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
--
1.9.1
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH] tests/gem_workarounds: adapt to constant wa list from driver
2014-09-18 14:58 ` [PATCH 6/6] drm/i915: Check workaround status on dfs read time Mika Kuoppala
@ 2014-09-18 15:18 ` Mika Kuoppala
0 siblings, 0 replies; 17+ messages in thread
From: Mika Kuoppala @ 2014-09-18 15:18 UTC (permalink / raw)
To: intel-gfx
Read and verify workaround list before and after the
reset/resume.
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
tests/gem_workarounds.c | 123 +++++++++++++++++++++++++-----------------------
1 file changed, 64 insertions(+), 59 deletions(-)
diff --git a/tests/gem_workarounds.c b/tests/gem_workarounds.c
index 32156d2..d2b3959 100644
--- a/tests/gem_workarounds.c
+++ b/tests/gem_workarounds.c
@@ -50,6 +50,7 @@
enum operation {
GPU_RESET = 0x01,
SUSPEND_RESUME = 0x02,
+ SIMPLE_READ = 0x03,
};
struct intel_wa_reg {
@@ -58,25 +59,20 @@ struct intel_wa_reg {
uint32_t mask;
};
-int drm_fd;
-uint32_t devid;
+static int drm_fd;
+static uint32_t devid;
static drm_intel_bufmgr *bufmgr;
struct intel_batchbuffer *batch;
-int num_wa_regs;
-struct intel_wa_reg *wa_regs;
+static int num_wa_regs;
+static struct intel_wa_reg *wa_regs;
-static void test_hang_gpu(void)
+static void wait_gpu(void)
{
- int retry_count = 30;
- enum stop_ring_flags flags;
struct drm_i915_gem_execbuffer2 execbuf;
struct drm_i915_gem_exec_object2 gem_exec;
uint32_t b[2] = {MI_BATCH_BUFFER_END};
- igt_assert(retry_count);
- igt_set_stop_rings(STOP_RING_DEFAULTS);
-
memset(&gem_exec, 0, sizeof(gem_exec));
gem_exec.handle = gem_create(drm_fd, 4096);
gem_write(drm_fd, gem_exec.handle, 0, b, sizeof(b));
@@ -88,6 +84,21 @@ static void test_hang_gpu(void)
drmIoctl(drm_fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
+ gem_sync(drm_fd, gem_exec.handle);
+
+ gem_close(drm_fd, gem_exec.handle);
+}
+
+static void test_hang_gpu(void)
+{
+ int retry_count = 30;
+ enum stop_ring_flags flags;
+
+ igt_assert(retry_count);
+ igt_set_stop_rings(STOP_RING_DEFAULTS);
+
+ wait_gpu();
+
while(retry_count--) {
flags = igt_get_stop_rings();
if (flags == 0)
@@ -107,30 +118,40 @@ static void test_suspend_resume(void)
igt_system_suspend_autoresume();
}
-static void get_current_wa_data(struct intel_wa_reg **curr, int num)
+static int workaround_fail_count(void)
{
- int i;
- struct intel_wa_reg *ptr = NULL;
-
- ptr = *curr;
+ int i, fail_count = 0;
intel_register_access_init(intel_get_pci_device(), 0);
- for (i = 0; i < num; ++i) {
- ptr[i].addr = wa_regs[i].addr;
- ptr[i].value = intel_register_read(wa_regs[i].addr);
- ptr[i].mask = wa_regs[i].mask;
+ igt_debug("Address\tval\t\tmask\t\tread\t\tresult\n");
+
+ for (i = 0; i < num_wa_regs; ++i) {
+ const uint32_t val = intel_register_read(wa_regs[i].addr);
+ const bool ok = (wa_regs[i].value & wa_regs[i].mask) ==
+ (val & wa_regs[i].mask);
+
+ igt_debug("0x%05X\t0x%08X\t0x%08X\t0x%08X\t%s\n",
+ wa_regs[i].addr, wa_regs[i].value, wa_regs[i].mask,
+ val, ok ? "OK" : "FAIL");
+
+ if (!ok) {
+ igt_warn("0x%05X\t0x%08X\t0x%08X\t0x%08X\t%s\n",
+ wa_regs[i].addr, wa_regs[i].value,
+ wa_regs[i].mask,
+ val, ok ? "OK" : "FAIL");
+ fail_count++;
+ }
}
intel_register_access_fini();
+
+ return fail_count;
}
-static void check_workarounds(enum operation op, int num)
+static void check_workarounds(enum operation op)
{
- int i;
- int fail_count = 0;
- int status = 0;
- struct intel_wa_reg *current_wa = NULL;
+ igt_assert(workaround_fail_count() == 0);
switch (op) {
case GPU_RESET:
@@ -141,31 +162,14 @@ static void check_workarounds(enum operation op, int num)
test_suspend_resume();
break;
- default:
- fail_count = 1;
- goto out;
- }
+ case SIMPLE_READ:
+ return;
- current_wa = malloc(num * sizeof(*current_wa));
- igt_assert(current_wa);
- get_current_wa_data(¤t_wa, num);
-
- igt_info("Address\tbefore\t\tafter\t\tw/a mask\tresult\n");
- for (i = 0; i < num; ++i) {
- status = (current_wa[i].value & current_wa[i].mask) !=
- (wa_regs[i].value & wa_regs[i].mask);
- if (status)
- ++fail_count;
-
- igt_info("0x%X\t0x%08X\t0x%08X\t0x%08X\t%s\n",
- current_wa[i].addr, wa_regs[i].value,
- current_wa[i].value, current_wa[i].mask,
- status ? "fail" : "success");
+ default:
+ igt_assert(0);
}
-out:
- free(current_wa);
- igt_assert(fail_count == 0);
+ igt_assert(workaround_fail_count() == 0);
}
igt_main
@@ -193,7 +197,13 @@ igt_main
ret = getline(&line, &line_size, file);
igt_assert(ret > 0);
sscanf(line, "Workarounds applied: %d", &num_wa_regs);
- igt_assert(num_wa_regs > 0);
+
+ if (IS_BROADWELL(devid) ||
+ IS_IVYBRIDGE(devid) ||
+ IS_CHERRYVIEW(devid))
+ igt_assert(num_wa_regs > 0);
+ else
+ igt_assert(num_wa_regs >= 0);
wa_regs = malloc(num_wa_regs * sizeof(*wa_regs));
@@ -210,19 +220,14 @@ igt_main
close(fd);
}
- igt_subtest("check-workaround-data-after-reset") {
- if (IS_BROADWELL(devid))
- check_workarounds(GPU_RESET, num_wa_regs);
- else
- igt_skip_on("No Workaround table available!!\n");
- }
+ igt_subtest("read")
+ check_workarounds(SIMPLE_READ);
- igt_subtest("check-workaround-data-after-suspend-resume") {
- if (IS_BROADWELL(devid))
- check_workarounds(SUSPEND_RESUME, num_wa_regs);
- else
- igt_skip_on("No Workaround table available!!\n");
- }
+ igt_subtest("reset")
+ check_workarounds(GPU_RESET);
+
+ igt_subtest("suspend-resume")
+ check_workarounds(SUSPEND_RESUME);
igt_fixture {
free(wa_regs);
--
1.9.1
^ permalink raw reply related [flat|nested] 17+ messages in thread