* [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch
@ 2015-07-03 13:27 Arun Siluvery
2015-07-03 15:42 ` Chris Wilson
2015-07-05 1:34 ` shuang.he
0 siblings, 2 replies; 11+ messages in thread
From: Arun Siluvery @ 2015-07-03 13:27 UTC (permalink / raw)
To: intel-gfx
In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after PIPE_CONTROL
instruction but there is a slight complication as this is applied in WA batch
where the values are only initialized once.
Dave identified an issue with the current implementation where the register value
is read once at the beginning and it is reused; this patch corrects this by saving
the register value to memory, update register with the bit of our interest and
restore it back with original value.
This implementation uses MI_LOAD_REGISTER_MEM which is currently only used
by command parser and was using a default length of 0. This is now updated
with correct length and moved to appropriate place.
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Gordon <david.s.gordon@intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
---
drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +--
drivers/gpu/drm/i915/i915_reg.h | 3 +-
drivers/gpu/drm/i915/intel_lrc.c | 72 +++++++++++++++++++++++++---------
3 files changed, 58 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 306d9e4..430571b 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -131,7 +131,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
.mask = MI_GLOBAL_GTT,
.expected = 0,
}}, ),
- CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B,
+ CMD( MI_LOAD_REGISTER_MEM(1), SMI, !F, 0xFF, W | B,
.reg = { .offset = 1, .mask = 0x007FFFFC },
.bits = {{
.offset = 0,
@@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
* only MI_LOAD_REGISTER_IMM commands.
*/
if (reg_addr == OACONTROL) {
- if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
+ if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
DRM_DEBUG_DRIVER("CMD: Rejected LRM to OACONTROL\n");
return false;
}
@@ -1035,7 +1035,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
* allowed mask/value pair given in the whitelist entry.
*/
if (reg->mask) {
- if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
+ if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n",
reg_addr);
return false;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index fa9ccb87..1d1cbe0 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -352,6 +352,8 @@
#define MI_INVALIDATE_BSD (1<<7)
#define MI_FLUSH_DW_USE_GTT (1<<2)
#define MI_FLUSH_DW_USE_PPGTT (0<<2)
+#define MI_LOAD_REGISTER_MEM(x) MI_INSTR(0x29, 2*(x)-1)
+#define MI_LOAD_REGISTER_MEM_GEN8(x) MI_INSTR(0x29, 3*(x)-1)
#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
#define MI_BATCH_NON_SECURE (1)
/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
@@ -456,7 +458,6 @@
#define MI_CLFLUSH MI_INSTR(0x27, 0)
#define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0)
#define MI_REPORT_PERF_COUNT_GGTT (1<<0)
-#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 0)
#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0)
#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0)
#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5298103..23ff018 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1095,6 +1095,56 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
batch[index++] = (cmd); \
} while (0)
+
+/*
+ * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
+ * PIPE_CONTROL instruction. This is required for the flush to happen correctly
+ * but there is a slight complication as this is applied in WA batch where the
+ * values are only initialized once so we cannot take register value at the
+ * beginning and reuse it further; hence we save its value to memory, upload a
+ * constant value with bit21 set and then we restore it back with the saved value.
+ * To simplify the WA, a constant value is formed by using the default value
+ * of this register. This shouldn't be a problem because we are only modifying
+ * it for a short period and this batch in non-premptible. We can ofcourse
+ * use additional instructions that read the actual value of the register
+ * at that time and set our bit of interest but it makes the WA complicated.
+ *
+ * This WA is also required for Gen9 so extracting as a function avoids
+ * code duplication.
+ */
+static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *ring,
+ uint32_t *const batch,
+ uint32_t index)
+{
+ uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
+
+ wa_ctx_emit(batch, (MI_STORE_REGISTER_MEM_GEN8(1) |
+ MI_SRM_LRM_GLOBAL_GTT));
+ wa_ctx_emit(batch, GEN8_L3SQCREG4);
+ wa_ctx_emit(batch, ring->scratch.gtt_offset + 256);
+ wa_ctx_emit(batch, 0);
+
+ wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
+ wa_ctx_emit(batch, GEN8_L3SQCREG4);
+ wa_ctx_emit(batch, l3sqc4_flush);
+
+ wa_ctx_emit(batch, GFX_OP_PIPE_CONTROL(6));
+ wa_ctx_emit(batch, (PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_DC_FLUSH_ENABLE));
+ wa_ctx_emit(batch, 0);
+ wa_ctx_emit(batch, 0);
+ wa_ctx_emit(batch, 0);
+ wa_ctx_emit(batch, 0);
+
+ wa_ctx_emit(batch, (MI_LOAD_REGISTER_MEM_GEN8(1) |
+ MI_SRM_LRM_GLOBAL_GTT));
+ wa_ctx_emit(batch, GEN8_L3SQCREG4);
+ wa_ctx_emit(batch, ring->scratch.gtt_offset + 256);
+ wa_ctx_emit(batch, 0);
+
+ return index;
+}
+
static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
uint32_t offset,
uint32_t start_alignment)
@@ -1155,25 +1205,9 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
if (IS_BROADWELL(ring->dev)) {
- struct drm_i915_private *dev_priv = to_i915(ring->dev);
- uint32_t l3sqc4_flush = (I915_READ(GEN8_L3SQCREG4) |
- GEN8_LQSC_FLUSH_COHERENT_LINES);
-
- wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
- wa_ctx_emit(batch, GEN8_L3SQCREG4);
- wa_ctx_emit(batch, l3sqc4_flush);
-
- wa_ctx_emit(batch, GFX_OP_PIPE_CONTROL(6));
- wa_ctx_emit(batch, (PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_DC_FLUSH_ENABLE));
- wa_ctx_emit(batch, 0);
- wa_ctx_emit(batch, 0);
- wa_ctx_emit(batch, 0);
- wa_ctx_emit(batch, 0);
-
- wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
- wa_ctx_emit(batch, GEN8_L3SQCREG4);
- wa_ctx_emit(batch, l3sqc4_flush & ~GEN8_LQSC_FLUSH_COHERENT_LINES);
+ index = gen8_emit_flush_coherentl3_wa(ring, batch, index);
+ if (index < 0)
+ return index;
}
/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch
2015-07-03 13:27 [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch Arun Siluvery
@ 2015-07-03 15:42 ` Chris Wilson
2015-07-06 11:52 ` Dave Gordon
2015-07-05 1:34 ` shuang.he
1 sibling, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2015-07-03 15:42 UTC (permalink / raw)
To: Arun Siluvery; +Cc: intel-gfx
On Fri, Jul 03, 2015 at 02:27:31PM +0100, Arun Siluvery wrote:
> In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after PIPE_CONTROL
> instruction but there is a slight complication as this is applied in WA batch
> where the values are only initialized once.
> Dave identified an issue with the current implementation where the register value
> is read once at the beginning and it is reused; this patch corrects this by saving
> the register value to memory, update register with the bit of our interest and
> restore it back with original value.
>
> This implementation uses MI_LOAD_REGISTER_MEM which is currently only used
> by command parser and was using a default length of 0. This is now updated
> with correct length and moved to appropriate place.
>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Dave Gordon <david.s.gordon@intel.com>
> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +--
> drivers/gpu/drm/i915/i915_reg.h | 3 +-
> drivers/gpu/drm/i915/intel_lrc.c | 72 +++++++++++++++++++++++++---------
> 3 files changed, 58 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index 306d9e4..430571b 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -131,7 +131,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
> .mask = MI_GLOBAL_GTT,
> .expected = 0,
> }}, ),
> - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B,
> + CMD( MI_LOAD_REGISTER_MEM(1), SMI, !F, 0xFF, W | B,
> .reg = { .offset = 1, .mask = 0x007FFFFC },
> .bits = {{
> .offset = 0,
> @@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
> * only MI_LOAD_REGISTER_IMM commands.
> */
> if (reg_addr == OACONTROL) {
> - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
> + if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
I had a double take here, but it all comes out in the wash. For one
moment, I thought the cmd matching had changed, but that has the length
masked out.
Reviewed-by: Chris Wilson <chris@cris-wilson.co.uk>
Who will start to complain about all the extra frequent register writes,
probably into common power wells....
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch
2015-07-03 15:42 ` Chris Wilson
@ 2015-07-06 11:52 ` Dave Gordon
2015-07-06 12:24 ` Siluvery, Arun
2015-07-06 12:38 ` Daniel Vetter
0 siblings, 2 replies; 11+ messages in thread
From: Dave Gordon @ 2015-07-06 11:52 UTC (permalink / raw)
To: Chris Wilson, Arun Siluvery, intel-gfx
On 03/07/15 16:42, Chris Wilson wrote:
> On Fri, Jul 03, 2015 at 02:27:31PM +0100, Arun Siluvery wrote:
>> In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after PIPE_CONTROL
>> instruction but there is a slight complication as this is applied in WA batch
>> where the values are only initialized once.
>> Dave identified an issue with the current implementation where the register value
>> is read once at the beginning and it is reused; this patch corrects this by saving
>> the register value to memory, update register with the bit of our interest and
>> restore it back with original value.
>>
>> This implementation uses MI_LOAD_REGISTER_MEM which is currently only used
>> by command parser and was using a default length of 0. This is now updated
>> with correct length and moved to appropriate place.
>>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Dave Gordon <david.s.gordon@intel.com>
>> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +--
>> drivers/gpu/drm/i915/i915_reg.h | 3 +-
>> drivers/gpu/drm/i915/intel_lrc.c | 72 +++++++++++++++++++++++++---------
>> 3 files changed, 58 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
>> index 306d9e4..430571b 100644
>> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
>> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
>> @@ -131,7 +131,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
>> .mask = MI_GLOBAL_GTT,
>> .expected = 0,
>> }}, ),
>> - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B,
>> + CMD( MI_LOAD_REGISTER_MEM(1), SMI, !F, 0xFF, W | B,
>> .reg = { .offset = 1, .mask = 0x007FFFFC },
>> .bits = {{
>> .offset = 0,
>> @@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
>> * only MI_LOAD_REGISTER_IMM commands.
>> */
>> if (reg_addr == OACONTROL) {
>> - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
>> + if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
>
> I had a double take here, but it all comes out in the wash. For one
> moment, I thought the cmd matching had changed, but that has the length
> masked out.
>
> Reviewed-by: Chris Wilson <chris@cris-wilson.co.uk>
>
> Who will start to complain about all the extra frequent register writes,
> probably into common power wells....
> -Chris
Hmm ... that is quite confusing, especially as the actual opcode in the
instruction stream will be MI_LOAD_REGISTER_MEM(2) on GEN8+. It might
almost be better to use MI_LOAD_REGISTER_MEM(0) to emphasise that the
length field is a wildcard and not something that will be matched exactly.
.Dave.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch
2015-07-06 11:52 ` Dave Gordon
@ 2015-07-06 12:24 ` Siluvery, Arun
2015-07-06 12:38 ` Daniel Vetter
1 sibling, 0 replies; 11+ messages in thread
From: Siluvery, Arun @ 2015-07-06 12:24 UTC (permalink / raw)
To: Dave Gordon, Chris Wilson, intel-gfx
On 06/07/2015 12:52, Dave Gordon wrote:
> On 03/07/15 16:42, Chris Wilson wrote:
>> On Fri, Jul 03, 2015 at 02:27:31PM +0100, Arun Siluvery wrote:
>>> In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after PIPE_CONTROL
>>> instruction but there is a slight complication as this is applied in WA batch
>>> where the values are only initialized once.
>>> Dave identified an issue with the current implementation where the register value
>>> is read once at the beginning and it is reused; this patch corrects this by saving
>>> the register value to memory, update register with the bit of our interest and
>>> restore it back with original value.
>>>
>>> This implementation uses MI_LOAD_REGISTER_MEM which is currently only used
>>> by command parser and was using a default length of 0. This is now updated
>>> with correct length and moved to appropriate place.
>>>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Dave Gordon <david.s.gordon@intel.com>
>>> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +--
>>> drivers/gpu/drm/i915/i915_reg.h | 3 +-
>>> drivers/gpu/drm/i915/intel_lrc.c | 72 +++++++++++++++++++++++++---------
>>> 3 files changed, 58 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
>>> index 306d9e4..430571b 100644
>>> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
>>> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
>>> @@ -131,7 +131,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
>>> .mask = MI_GLOBAL_GTT,
>>> .expected = 0,
>>> }}, ),
>>> - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B,
>>> + CMD( MI_LOAD_REGISTER_MEM(1), SMI, !F, 0xFF, W | B,
>>> .reg = { .offset = 1, .mask = 0x007FFFFC },
>>> .bits = {{
>>> .offset = 0,
>>> @@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
>>> * only MI_LOAD_REGISTER_IMM commands.
>>> */
>>> if (reg_addr == OACONTROL) {
>>> - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
>>> + if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
>>
>> I had a double take here, but it all comes out in the wash. For one
>> moment, I thought the cmd matching had changed, but that has the length
>> masked out.
>>
>> Reviewed-by: Chris Wilson <chris@cris-wilson.co.uk>
>>
>> Who will start to complain about all the extra frequent register writes,
>> probably into common power wells....
>> -Chris
>
> Hmm ... that is quite confusing, especially as the actual opcode in the
> instruction stream will be MI_LOAD_REGISTER_MEM(2) on GEN8+. It might
true, but cmd parser is only upto GEN7.
regards
Arun
> almost be better to use MI_LOAD_REGISTER_MEM(0) to emphasise that the
> length field is a wildcard and not something that will be matched exactly.
>
> .Dave.
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch
2015-07-06 11:52 ` Dave Gordon
2015-07-06 12:24 ` Siluvery, Arun
@ 2015-07-06 12:38 ` Daniel Vetter
2015-07-06 13:16 ` Dave Gordon
1 sibling, 1 reply; 11+ messages in thread
From: Daniel Vetter @ 2015-07-06 12:38 UTC (permalink / raw)
To: Dave Gordon; +Cc: intel-gfx
On Mon, Jul 06, 2015 at 12:52:51PM +0100, Dave Gordon wrote:
> On 03/07/15 16:42, Chris Wilson wrote:
> >On Fri, Jul 03, 2015 at 02:27:31PM +0100, Arun Siluvery wrote:
> >>In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after PIPE_CONTROL
> >>instruction but there is a slight complication as this is applied in WA batch
> >>where the values are only initialized once.
> >>Dave identified an issue with the current implementation where the register value
> >>is read once at the beginning and it is reused; this patch corrects this by saving
> >>the register value to memory, update register with the bit of our interest and
> >>restore it back with original value.
> >>
> >>This implementation uses MI_LOAD_REGISTER_MEM which is currently only used
> >>by command parser and was using a default length of 0. This is now updated
> >>with correct length and moved to appropriate place.
> >>
> >>Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >>Cc: Dave Gordon <david.s.gordon@intel.com>
> >>Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> >>---
> >> drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +--
> >> drivers/gpu/drm/i915/i915_reg.h | 3 +-
> >> drivers/gpu/drm/i915/intel_lrc.c | 72 +++++++++++++++++++++++++---------
> >> 3 files changed, 58 insertions(+), 23 deletions(-)
> >>
> >>diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> >>index 306d9e4..430571b 100644
> >>--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> >>+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> >>@@ -131,7 +131,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
> >> .mask = MI_GLOBAL_GTT,
> >> .expected = 0,
> >> }}, ),
> >>- CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B,
> >>+ CMD( MI_LOAD_REGISTER_MEM(1), SMI, !F, 0xFF, W | B,
> >> .reg = { .offset = 1, .mask = 0x007FFFFC },
> >> .bits = {{
> >> .offset = 0,
> >>@@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
> >> * only MI_LOAD_REGISTER_IMM commands.
> >> */
> >> if (reg_addr == OACONTROL) {
> >>- if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
> >>+ if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
> >
> >I had a double take here, but it all comes out in the wash. For one
> >moment, I thought the cmd matching had changed, but that has the length
> >masked out.
> >
> >Reviewed-by: Chris Wilson <chris@cris-wilson.co.uk>
Queued for -next, thanks for the patch.
> >Who will start to complain about all the extra frequent register writes,
> >probably into common power wells....
> >-Chris
>
> Hmm ... that is quite confusing, especially as the actual opcode in the
> instruction stream will be MI_LOAD_REGISTER_MEM(2) on GEN8+. It might almost
> be better to use MI_LOAD_REGISTER_MEM(0) to emphasise that the length field
> is a wildcard and not something that will be matched exactly.
There's a separate _GEN8 #define to accomodate the differences, so I don't
fully understand your concern. We also don't do any decoding in the kernel
...
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch
2015-07-06 12:38 ` Daniel Vetter
@ 2015-07-06 13:16 ` Dave Gordon
2015-07-06 14:33 ` Daniel Vetter
0 siblings, 1 reply; 11+ messages in thread
From: Dave Gordon @ 2015-07-06 13:16 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx
On 06/07/15 13:38, Daniel Vetter wrote:
> On Mon, Jul 06, 2015 at 12:52:51PM +0100, Dave Gordon wrote:
>> On 03/07/15 16:42, Chris Wilson wrote:
>>> On Fri, Jul 03, 2015 at 02:27:31PM +0100, Arun Siluvery wrote:
>>>> In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after PIPE_CONTROL
>>>> instruction but there is a slight complication as this is applied in WA batch
>>>> where the values are only initialized once.
>>>> Dave identified an issue with the current implementation where the register value
>>>> is read once at the beginning and it is reused; this patch corrects this by saving
>>>> the register value to memory, update register with the bit of our interest and
>>>> restore it back with original value.
>>>>
>>>> This implementation uses MI_LOAD_REGISTER_MEM which is currently only used
>>>> by command parser and was using a default length of 0. This is now updated
>>>> with correct length and moved to appropriate place.
>>>>
>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>> Cc: Dave Gordon <david.s.gordon@intel.com>
>>>> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
>>>> ---
>>>> drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +--
>>>> drivers/gpu/drm/i915/i915_reg.h | 3 +-
>>>> drivers/gpu/drm/i915/intel_lrc.c | 72 +++++++++++++++++++++++++---------
>>>> 3 files changed, 58 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
>>>> index 306d9e4..430571b 100644
>>>> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
>>>> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
>>>> @@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
>>>> * only MI_LOAD_REGISTER_IMM commands.
>>>> */
>>>> if (reg_addr == OACONTROL) {
>>>> - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
>>>> + if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
>>>
>>> I had a double take here, but it all comes out in the wash. For one
>>> moment, I thought the cmd matching had changed, but that has the length
>>> masked out.
>>>
>>> Reviewed-by: Chris Wilson <chris@cris-wilson.co.uk>
>
> Queued for -next, thanks for the patch.
>
>>> Who will start to complain about all the extra frequent register writes,
>>> probably into common power wells....
>>> -Chris
>>
>> Hmm ... that is quite confusing, especially as the actual opcode in the
>> instruction stream will be MI_LOAD_REGISTER_MEM(2) on GEN8+. It might almost
>> be better to use MI_LOAD_REGISTER_MEM(0) to emphasise that the length field
>> is a wildcard and not something that will be matched exactly.
>
> There's a separate _GEN8 #define to accomodate the differences, so I don't
> fully understand your concern. We also don't do any decoding in the kernel
> ...
> -Daniel
In the snippet:
>> - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B,
>> + CMD( MI_LOAD_REGISTER_MEM(1), SMI, !F, 0xFF, W | B,
the (1) goes in the table but is ignored when matching instructions in
the stream being parsed. It could just as well be (2) or (0) or (255).
Then, in the test:
>> - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
>> + if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
the thing on the left of the == is not the instruction being examined,
but the entry in the table that matched that instruction. So here also
we're not really using the length field, EXCEPT that it MUST be the same
as the (arbitrary) value in the table.
So my concern here was not about correctness but comprehensibility and
hence maintainability -- after all, if Chris had to look twice it
obviously isn't as clear as one would like!
My suggestion was that maybe the "ignored" length field should be 0 to
make it less likely that a reader would think this matches exactly (and
only) an opcode of 0xa400001. Or maybe (255) would be even more
obviously not-a-literal-match?
.Dave.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch
2015-07-06 13:16 ` Dave Gordon
@ 2015-07-06 14:33 ` Daniel Vetter
2015-07-06 15:25 ` Dave Gordon
2015-07-06 15:41 ` Chris Wilson
0 siblings, 2 replies; 11+ messages in thread
From: Daniel Vetter @ 2015-07-06 14:33 UTC (permalink / raw)
To: Dave Gordon; +Cc: intel-gfx
On Mon, Jul 06, 2015 at 02:16:54PM +0100, Dave Gordon wrote:
> On 06/07/15 13:38, Daniel Vetter wrote:
> >On Mon, Jul 06, 2015 at 12:52:51PM +0100, Dave Gordon wrote:
> >>On 03/07/15 16:42, Chris Wilson wrote:
> >>>On Fri, Jul 03, 2015 at 02:27:31PM +0100, Arun Siluvery wrote:
> >>>>In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after PIPE_CONTROL
> >>>>instruction but there is a slight complication as this is applied in WA batch
> >>>>where the values are only initialized once.
> >>>>Dave identified an issue with the current implementation where the register value
> >>>>is read once at the beginning and it is reused; this patch corrects this by saving
> >>>>the register value to memory, update register with the bit of our interest and
> >>>>restore it back with original value.
> >>>>
> >>>>This implementation uses MI_LOAD_REGISTER_MEM which is currently only used
> >>>>by command parser and was using a default length of 0. This is now updated
> >>>>with correct length and moved to appropriate place.
> >>>>
> >>>>Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >>>>Cc: Dave Gordon <david.s.gordon@intel.com>
> >>>>Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> >>>>---
> >>>> drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +--
> >>>> drivers/gpu/drm/i915/i915_reg.h | 3 +-
> >>>> drivers/gpu/drm/i915/intel_lrc.c | 72 +++++++++++++++++++++++++---------
> >>>> 3 files changed, 58 insertions(+), 23 deletions(-)
> >>>>
> >>>>diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> >>>>index 306d9e4..430571b 100644
> >>>>--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> >>>>+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
>
> >>>>@@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
> >>>> * only MI_LOAD_REGISTER_IMM commands.
> >>>> */
> >>>> if (reg_addr == OACONTROL) {
> >>>>- if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
> >>>>+ if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
> >>>
> >>>I had a double take here, but it all comes out in the wash. For one
> >>>moment, I thought the cmd matching had changed, but that has the length
> >>>masked out.
> >>>
> >>>Reviewed-by: Chris Wilson <chris@cris-wilson.co.uk>
> >
> >Queued for -next, thanks for the patch.
> >
> >>>Who will start to complain about all the extra frequent register writes,
> >>>probably into common power wells....
> >>>-Chris
> >>
> >>Hmm ... that is quite confusing, especially as the actual opcode in the
> >>instruction stream will be MI_LOAD_REGISTER_MEM(2) on GEN8+. It might almost
> >>be better to use MI_LOAD_REGISTER_MEM(0) to emphasise that the length field
> >>is a wildcard and not something that will be matched exactly.
> >
> >There's a separate _GEN8 #define to accomodate the differences, so I don't
> >fully understand your concern. We also don't do any decoding in the kernel
> >...
> >-Daniel
>
> In the snippet:
>
> >> - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B,
> >> + CMD( MI_LOAD_REGISTER_MEM(1), SMI, !F, 0xFF, W | B,
>
> the (1) goes in the table but is ignored when matching instructions in the
> stream being parsed. It could just as well be (2) or (0) or (255).
>
> Then, in the test:
>
> >> - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
> >> + if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
>
> the thing on the left of the == is not the instruction being examined, but
> the entry in the table that matched that instruction. So here also we're not
> really using the length field, EXCEPT that it MUST be the same as the
> (arbitrary) value in the table.
>
> So my concern here was not about correctness but comprehensibility and hence
> maintainability -- after all, if Chris had to look twice it obviously isn't
> as clear as one would like!
>
> My suggestion was that maybe the "ignored" length field should be 0 to make
> it less likely that a reader would think this matches exactly (and only) an
> opcode of 0xa400001. Or maybe (255) would be even more obviously
> not-a-literal-match?
Hm, given that the cmd parser is gen7 only I'm not too concerned about
this. It is indeed a bit surprising though, and I guess (0) would be less
surprising. Otoh other commands with a lenght field also use (1) in a
similar fashion, so at least this is consistent.
tbh no opinion here at all from my side, but happy to merge a fixup on top
to clarify this, if you can agree on a clear improvement.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch
2015-07-06 14:33 ` Daniel Vetter
@ 2015-07-06 15:25 ` Dave Gordon
2015-07-06 15:41 ` Chris Wilson
1 sibling, 0 replies; 11+ messages in thread
From: Dave Gordon @ 2015-07-06 15:25 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx
On 06/07/15 15:33, Daniel Vetter wrote:
> On Mon, Jul 06, 2015 at 02:16:54PM +0100, Dave Gordon wrote:
>> On 06/07/15 13:38, Daniel Vetter wrote:
>>> On Mon, Jul 06, 2015 at 12:52:51PM +0100, Dave Gordon wrote:
>>>> On 03/07/15 16:42, Chris Wilson wrote:
>>>>> On Fri, Jul 03, 2015 at 02:27:31PM +0100, Arun Siluvery wrote:
>>>>>> In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after PIPE_CONTROL
>>>>>> instruction but there is a slight complication as this is applied in WA batch
>>>>>> where the values are only initialized once.
>>>>>> Dave identified an issue with the current implementation where the register value
>>>>>> is read once at the beginning and it is reused; this patch corrects this by saving
>>>>>> the register value to memory, update register with the bit of our interest and
>>>>>> restore it back with original value.
>>>>>>
>>>>>> This implementation uses MI_LOAD_REGISTER_MEM which is currently only used
>>>>>> by command parser and was using a default length of 0. This is now updated
>>>>>> with correct length and moved to appropriate place.
>>>>>>
>>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>> Cc: Dave Gordon <david.s.gordon@intel.com>
>>>>>> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
>>>>>> ---
>>>>>> drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +--
>>>>>> drivers/gpu/drm/i915/i915_reg.h | 3 +-
>>>>>> drivers/gpu/drm/i915/intel_lrc.c | 72 +++++++++++++++++++++++++---------
>>>>>> 3 files changed, 58 insertions(+), 23 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
>>>>>> index 306d9e4..430571b 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
>>>>>> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
>>
>>>>>> @@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
>>>>>> * only MI_LOAD_REGISTER_IMM commands.
>>>>>> */
>>>>>> if (reg_addr == OACONTROL) {
>>>>>> - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
>>>>>> + if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
>>>>>
>>>>> I had a double take here, but it all comes out in the wash. For one
>>>>> moment, I thought the cmd matching had changed, but that has the length
>>>>> masked out.
>>>>>
>>>>> Reviewed-by: Chris Wilson <chris@cris-wilson.co.uk>
>>>
>>> Queued for -next, thanks for the patch.
>>>
>>>>> Who will start to complain about all the extra frequent register writes,
>>>>> probably into common power wells....
>>>>> -Chris
>>>>
>>>> Hmm ... that is quite confusing, especially as the actual opcode in the
>>>> instruction stream will be MI_LOAD_REGISTER_MEM(2) on GEN8+. It might almost
>>>> be better to use MI_LOAD_REGISTER_MEM(0) to emphasise that the length field
>>>> is a wildcard and not something that will be matched exactly.
>>>
>>> There's a separate _GEN8 #define to accomodate the differences, so I don't
>>> fully understand your concern. We also don't do any decoding in the kernel
>>> ...
>>> -Daniel
>>
>> In the snippet:
>>
>>>> - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B,
>>>> + CMD( MI_LOAD_REGISTER_MEM(1), SMI, !F, 0xFF, W | B,
>>
>> the (1) goes in the table but is ignored when matching instructions in the
>> stream being parsed. It could just as well be (2) or (0) or (255).
>>
>> Then, in the test:
>>
>>>> - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
>>>> + if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
>>
>> the thing on the left of the == is not the instruction being examined, but
>> the entry in the table that matched that instruction. So here also we're not
>> really using the length field, EXCEPT that it MUST be the same as the
>> (arbitrary) value in the table.
>>
>> So my concern here was not about correctness but comprehensibility and hence
>> maintainability -- after all, if Chris had to look twice it obviously isn't
>> as clear as one would like!
>>
>> My suggestion was that maybe the "ignored" length field should be 0 to make
>> it less likely that a reader would think this matches exactly (and only) an
>> opcode of 0xa400001. Or maybe (255) would be even more obviously
>> not-a-literal-match?
>
> Hm, given that the cmd parser is gen7 only I'm not too concerned about
> this. It is indeed a bit surprising though, and I guess (0) would be less
> surprising. Otoh other commands with a lenght field also use (1) in a
> similar fashion, so at least this is consistent.
>
> tbh no opinion here at all from my side, but happy to merge a fixup on top
> to clarify this, if you can agree on a clear improvement.
> -Daniel
Hi Daniel,
Arun & I have just discussed this and we think we should go with this
as-is for now, and then fix all the places that have an opcode macro
with a redundant non-zero length in a separate patch.
Thanks,
.Dave.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch
2015-07-06 14:33 ` Daniel Vetter
2015-07-06 15:25 ` Dave Gordon
@ 2015-07-06 15:41 ` Chris Wilson
2015-07-10 15:24 ` Dave Gordon
1 sibling, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2015-07-06 15:41 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx
On Mon, Jul 06, 2015 at 04:33:05PM +0200, Daniel Vetter wrote:
> On Mon, Jul 06, 2015 at 02:16:54PM +0100, Dave Gordon wrote:
> > On 06/07/15 13:38, Daniel Vetter wrote:
> > >On Mon, Jul 06, 2015 at 12:52:51PM +0100, Dave Gordon wrote:
> > >>On 03/07/15 16:42, Chris Wilson wrote:
> > >>>On Fri, Jul 03, 2015 at 02:27:31PM +0100, Arun Siluvery wrote:
> > >>>>In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after PIPE_CONTROL
> > >>>>instruction but there is a slight complication as this is applied in WA batch
> > >>>>where the values are only initialized once.
> > >>>>Dave identified an issue with the current implementation where the register value
> > >>>>is read once at the beginning and it is reused; this patch corrects this by saving
> > >>>>the register value to memory, update register with the bit of our interest and
> > >>>>restore it back with original value.
> > >>>>
> > >>>>This implementation uses MI_LOAD_REGISTER_MEM which is currently only used
> > >>>>by command parser and was using a default length of 0. This is now updated
> > >>>>with correct length and moved to appropriate place.
> > >>>>
> > >>>>Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > >>>>Cc: Dave Gordon <david.s.gordon@intel.com>
> > >>>>Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> > >>>>---
> > >>>> drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +--
> > >>>> drivers/gpu/drm/i915/i915_reg.h | 3 +-
> > >>>> drivers/gpu/drm/i915/intel_lrc.c | 72 +++++++++++++++++++++++++---------
> > >>>> 3 files changed, 58 insertions(+), 23 deletions(-)
> > >>>>
> > >>>>diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> > >>>>index 306d9e4..430571b 100644
> > >>>>--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> > >>>>+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> >
> > >>>>@@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
> > >>>> * only MI_LOAD_REGISTER_IMM commands.
> > >>>> */
> > >>>> if (reg_addr == OACONTROL) {
> > >>>>- if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
> > >>>>+ if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
> > >>>
> > >>>I had a double take here, but it all comes out in the wash. For one
> > >>>moment, I thought the cmd matching had changed, but that has the length
> > >>>masked out.
> > >>>
> > >>>Reviewed-by: Chris Wilson <chris@cris-wilson.co.uk>
> > >
> > >Queued for -next, thanks for the patch.
> > >
> > >>>Who will start to complain about all the extra frequent register writes,
> > >>>probably into common power wells....
> > >>>-Chris
> > >>
> > >>Hmm ... that is quite confusing, especially as the actual opcode in the
> > >>instruction stream will be MI_LOAD_REGISTER_MEM(2) on GEN8+. It might almost
> > >>be better to use MI_LOAD_REGISTER_MEM(0) to emphasise that the length field
> > >>is a wildcard and not something that will be matched exactly.
> > >
> > >There's a separate _GEN8 #define to accomodate the differences, so I don't
> > >fully understand your concern. We also don't do any decoding in the kernel
> > >...
> > >-Daniel
> >
> > In the snippet:
> >
> > >> - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B,
> > >> + CMD( MI_LOAD_REGISTER_MEM(1), SMI, !F, 0xFF, W | B,
> >
> > the (1) goes in the table but is ignored when matching instructions in the
> > stream being parsed. It could just as well be (2) or (0) or (255).
> >
> > Then, in the test:
> >
> > >> - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
> > >> + if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
> >
> > the thing on the left of the == is not the instruction being examined, but
> > the entry in the table that matched that instruction. So here also we're not
> > really using the length field, EXCEPT that it MUST be the same as the
> > (arbitrary) value in the table.
> >
> > So my concern here was not about correctness but comprehensibility and hence
> > maintainability -- after all, if Chris had to look twice it obviously isn't
> > as clear as one would like!
> >
> > My suggestion was that maybe the "ignored" length field should be 0 to make
> > it less likely that a reader would think this matches exactly (and only) an
> > opcode of 0xa400001. Or maybe (255) would be even more obviously
> > not-a-literal-match?
>
> Hm, given that the cmd parser is gen7 only I'm not too concerned about
> this. It is indeed a bit surprising though, and I guess (0) would be less
> surprising. Otoh other commands with a lenght field also use (1) in a
> similar fashion, so at least this is consistent.
>
> tbh no opinion here at all from my side, but happy to merge a fixup on top
> to clarify this, if you can agree on a clear improvement.
iirc, the #define used CMD | (2*(x)-1), which blows up if passed in 0.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch
2015-07-06 15:41 ` Chris Wilson
@ 2015-07-10 15:24 ` Dave Gordon
0 siblings, 0 replies; 11+ messages in thread
From: Dave Gordon @ 2015-07-10 15:24 UTC (permalink / raw)
To: Chris Wilson, Daniel Vetter, Arun Siluvery, intel-gfx
On 06/07/15 16:41, Chris Wilson wrote:
> On Mon, Jul 06, 2015 at 04:33:05PM +0200, Daniel Vetter wrote:
>> On Mon, Jul 06, 2015 at 02:16:54PM +0100, Dave Gordon wrote:
>>> On 06/07/15 13:38, Daniel Vetter wrote:
>>>> On Mon, Jul 06, 2015 at 12:52:51PM +0100, Dave Gordon wrote:
>>>>> On 03/07/15 16:42, Chris Wilson wrote:
>>>>>> On Fri, Jul 03, 2015 at 02:27:31PM +0100, Arun Siluvery wrote:
>>>>>>> In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after PIPE_CONTROL
>>>>>>> instruction but there is a slight complication as this is applied in WA batch
>>>>>>> where the values are only initialized once.
>>>>>>> Dave identified an issue with the current implementation where the register value
>>>>>>> is read once at the beginning and it is reused; this patch corrects this by saving
>>>>>>> the register value to memory, update register with the bit of our interest and
>>>>>>> restore it back with original value.
>>>>>>>
>>>>>>> This implementation uses MI_LOAD_REGISTER_MEM which is currently only used
>>>>>>> by command parser and was using a default length of 0. This is now updated
>>>>>>> with correct length and moved to appropriate place.
>>>>>>>
>>>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>>> Cc: Dave Gordon <david.s.gordon@intel.com>
>>>>>>> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
>>>>>>> ---
>>>>>>> drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +--
>>>>>>> drivers/gpu/drm/i915/i915_reg.h | 3 +-
>>>>>>> drivers/gpu/drm/i915/intel_lrc.c | 72 +++++++++++++++++++++++++---------
>>>>>>> 3 files changed, 58 insertions(+), 23 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
>>>>>>> index 306d9e4..430571b 100644
>>>>>>> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
>>>>>>> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
>>>
>>>>>>> @@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
>>>>>>> * only MI_LOAD_REGISTER_IMM commands.
>>>>>>> */
>>>>>>> if (reg_addr == OACONTROL) {
>>>>>>> - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
>>>>>>> + if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
>>>>>>
>>>>>> I had a double take here, but it all comes out in the wash. For one
>>>>>> moment, I thought the cmd matching had changed, but that has the length
>>>>>> masked out.
>>>>>>
>>>>>> Reviewed-by: Chris Wilson <chris@cris-wilson.co.uk>
>>>>
>>>> Queued for -next, thanks for the patch.
>>>>
>>>>>> Who will start to complain about all the extra frequent register writes,
>>>>>> probably into common power wells....
>>>>>> -Chris
>>>>>
>>>>> Hmm ... that is quite confusing, especially as the actual opcode in the
>>>>> instruction stream will be MI_LOAD_REGISTER_MEM(2) on GEN8+. It might almost
>>>>> be better to use MI_LOAD_REGISTER_MEM(0) to emphasise that the length field
>>>>> is a wildcard and not something that will be matched exactly.
>>>>
>>>> There's a separate _GEN8 #define to accomodate the differences, so I don't
>>>> fully understand your concern. We also don't do any decoding in the kernel
>>>> ...
>>>> -Daniel
>>>
>>> In the snippet:
>>>
>>>>> - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B,
>>>>> + CMD( MI_LOAD_REGISTER_MEM(1), SMI, !F, 0xFF, W | B,
>>>
>>> the (1) goes in the table but is ignored when matching instructions in the
>>> stream being parsed. It could just as well be (2) or (0) or (255).
>>>
>>> Then, in the test:
>>>
>>>>> - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
>>>>> + if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
>>>
>>> the thing on the left of the == is not the instruction being examined, but
>>> the entry in the table that matched that instruction. So here also we're not
>>> really using the length field, EXCEPT that it MUST be the same as the
>>> (arbitrary) value in the table.
>>>
>>> So my concern here was not about correctness but comprehensibility and hence
>>> maintainability -- after all, if Chris had to look twice it obviously isn't
>>> as clear as one would like!
>>>
>>> My suggestion was that maybe the "ignored" length field should be 0 to make
>>> it less likely that a reader would think this matches exactly (and only) an
>>> opcode of 0xa400001. Or maybe (255) would be even more obviously
>>> not-a-literal-match?
>>
>> Hm, given that the cmd parser is gen7 only I'm not too concerned about
>> this. It is indeed a bit surprising though, and I guess (0) would be less
>> surprising. Otoh other commands with a lenght field also use (1) in a
>> similar fashion, so at least this is consistent.
>>
>> tbh no opinion here at all from my side, but happy to merge a fixup on top
>> to clarify this, if you can agree on a clear improvement.
>
> iirc, the #define used CMD | (2*(x)-1), which blows up if passed in 0.
> -Chris
Yes; and furthermore the MI_LOAD_REGISTER_MEM instruction really doesn't
take a list of (register,address) pairs; unlike
MI_LOAD_REGISTER_IMMEDIATE it can only load ONE register from ONE memory
address. So the (x) is completely bogus anyway, and it should revert to
just:
#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1)
#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2)
AFAICS LRI is the only CS register-manipulating opcode that actually
supports a variable-length list?
.Dave.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch
2015-07-03 13:27 [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch Arun Siluvery
2015-07-03 15:42 ` Chris Wilson
@ 2015-07-05 1:34 ` shuang.he
1 sibling, 0 replies; 11+ messages in thread
From: shuang.he @ 2015-07-05 1:34 UTC (permalink / raw)
To: shuang.he, lei.a.liu, intel-gfx, arun.siluvery
Tested-By: Intel Graphics QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 6719
-------------------------------------Summary-------------------------------------
Platform Delta drm-intel-nightly Series Applied
ILK 302/302 302/302
SNB 312/316 312/316
IVB 343/343 343/343
BYT 287/287 287/287
HSW 380/380 380/380
-------------------------------------Detailed-------------------------------------
Platform Test drm-intel-nightly Series Applied
Note: You need to pay more attention to line start with '*'
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2015-07-10 15:24 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-03 13:27 [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch Arun Siluvery
2015-07-03 15:42 ` Chris Wilson
2015-07-06 11:52 ` Dave Gordon
2015-07-06 12:24 ` Siluvery, Arun
2015-07-06 12:38 ` Daniel Vetter
2015-07-06 13:16 ` Dave Gordon
2015-07-06 14:33 ` Daniel Vetter
2015-07-06 15:25 ` Dave Gordon
2015-07-06 15:41 ` Chris Wilson
2015-07-10 15:24 ` Dave Gordon
2015-07-05 1:34 ` shuang.he
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox