public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access
@ 2016-04-14 10:59 Tvrtko Ursulin
  2016-04-14 10:59 ` [PATCH 2/3] drm/i915: Use writel instead of iowrite32 when doing GTT relocations Tvrtko Ursulin
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Tvrtko Ursulin @ 2016-04-14 10:59 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We know ringbuffers are memory and not ports so if we use readl
and writel instead of ioread32 and iowrite32 (which dispatch to
the very same functions after checking the address range) we
avoid generating functions calls and branching on every access.

This generates smaller code and potentialy also improves
performance. Brief testing with gem_latency (ten runs of both
-n 0 and -n 100) show potential 3% better throughput and 1%
better latency although more runs would be required to be
absolutely certain.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c         | 8 ++++----
 drivers/gpu/drm/i915/intel_lrc.c        | 2 +-
 drivers/gpu/drm/i915/intel_lrc.h        | 2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 247d962afabb..d9c003225d8e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2907,7 +2907,7 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
 		head &= engine->buffer->size - 1;
 
 		/* This here seems to blow up */
-		cmd = ioread32(engine->buffer->virtual_start + head);
+		cmd = readl(engine->buffer->virtual_start + head);
 		if (cmd == ipehr)
 			break;
 
@@ -2917,11 +2917,11 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
 	if (!i)
 		return NULL;
 
-	*seqno = ioread32(engine->buffer->virtual_start + head + 4) + 1;
+	*seqno = readl(engine->buffer->virtual_start + head + 4) + 1;
 	if (INTEL_INFO(engine->dev)->gen >= 8) {
-		offset = ioread32(engine->buffer->virtual_start + head + 12);
+		offset = readl(engine->buffer->virtual_start + head + 12);
 		offset <<= 32;
-		offset = ioread32(engine->buffer->virtual_start + head + 8);
+		offset = readl(engine->buffer->virtual_start + head + 8);
 	}
 	return semaphore_wait_to_signaller_ring(engine, ipehr, offset);
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5e08ea5aa6d1..3c2dd448b446 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -822,7 +822,7 @@ static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
 	virt = ringbuf->virtual_start + ringbuf->tail;
 	rem /= 4;
 	while (rem--)
-		iowrite32(MI_NOOP, virt++);
+		writel(MI_NOOP, virt++);
 
 	ringbuf->tail = 0;
 	intel_ring_update_space(ringbuf);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 9affda2c650c..eeec4bc19bac 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -82,7 +82,7 @@ static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf)
 static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
 					   u32 data)
 {
-	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
+	writel(data, ringbuf->virtual_start + ringbuf->tail);
 	ringbuf->tail += 4;
 }
 static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer *ringbuf,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 19ebe7796e7f..d336a9de8a09 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2344,7 +2344,7 @@ static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
 	virt = ringbuf->virtual_start + ringbuf->tail;
 	rem /= 4;
 	while (rem--)
-		iowrite32(MI_NOOP, virt++);
+		writel(MI_NOOP, virt++);
 
 	ringbuf->tail = 0;
 	intel_ring_update_space(ringbuf);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2ade194bbea9..bd821e443ac9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -444,7 +444,7 @@ static inline void intel_ring_emit(struct intel_engine_cs *engine,
 				   u32 data)
 {
 	struct intel_ringbuffer *ringbuf = engine->buffer;
-	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
+	writel(data, ringbuf->virtual_start + ringbuf->tail);
 	ringbuf->tail += 4;
 }
 static inline void intel_ring_emit_reg(struct intel_engine_cs *engine,
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/3] drm/i915: Use writel instead of iowrite32 when doing GTT relocations
  2016-04-14 10:59 [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access Tvrtko Ursulin
@ 2016-04-14 10:59 ` Tvrtko Ursulin
  2016-04-14 11:17   ` Chris Wilson
  2016-04-14 10:59 ` [PATCH 3/3] drm/i915: Use writel instead of iowrite32 when programming page table entries Tvrtko Ursulin
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2016-04-14 10:59 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We know that our aperture is memory and not ports so we can use
the correct accessors straight away and avoid function calls.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 6ee4f00f620c..77b04a76b8d5 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -333,7 +333,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
 	offset += reloc->offset;
 	reloc_page = io_mapping_map_atomic_wc(ggtt->mappable,
 					      offset & PAGE_MASK);
-	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
+	writel(lower_32_bits(delta), reloc_page + offset_in_page(offset));
 
 	if (INTEL_INFO(dev)->gen >= 8) {
 		offset += sizeof(uint32_t);
@@ -345,8 +345,8 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
 							 offset);
 		}
 
-		iowrite32(upper_32_bits(delta),
-			  reloc_page + offset_in_page(offset));
+		writel(upper_32_bits(delta),
+		       reloc_page + offset_in_page(offset));
 	}
 
 	io_mapping_unmap_atomic(reloc_page);
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/3] drm/i915: Use writel instead of iowrite32 when programming page table entries
  2016-04-14 10:59 [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access Tvrtko Ursulin
  2016-04-14 10:59 ` [PATCH 2/3] drm/i915: Use writel instead of iowrite32 when doing GTT relocations Tvrtko Ursulin
@ 2016-04-14 10:59 ` Tvrtko Ursulin
  2016-04-14 11:16 ` [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access Chris Wilson
  2016-04-14 16:37 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] " Patchwork
  3 siblings, 0 replies; 12+ messages in thread
From: Tvrtko Ursulin @ 2016-04-14 10:59 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We know that the underlying area is memory and not ports so we
can use the correct accessor straight away to save on function
calls.

Especially since the gen8_set_pte already mixes writeq and
iowrite32 depending on the kernel config.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c5cb04907525..6f506ab714c5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2348,8 +2348,8 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 #ifdef writeq
 	writeq(pte, addr);
 #else
-	iowrite32((u32)pte, addr);
-	iowrite32(pte >> 32, addr + 4);
+	writel((u32)pte, addr);
+	writel(pte >> 32, addr + 4);
 #endif
 }
 
@@ -2450,7 +2450,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
 		addr = sg_page_iter_dma_address(&sg_iter);
-		iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
+		writel(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
 		i++;
 	}
 
@@ -2533,7 +2533,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 				     I915_CACHE_LLC, use_scratch, 0);
 
 	for (i = 0; i < num_entries; i++)
-		iowrite32(scratch_pte, &gtt_base[i]);
+		writel(scratch_pte, &gtt_base[i]);
 	readl(gtt_base);
 
 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access
  2016-04-14 10:59 [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access Tvrtko Ursulin
  2016-04-14 10:59 ` [PATCH 2/3] drm/i915: Use writel instead of iowrite32 when doing GTT relocations Tvrtko Ursulin
  2016-04-14 10:59 ` [PATCH 3/3] drm/i915: Use writel instead of iowrite32 when programming page table entries Tvrtko Ursulin
@ 2016-04-14 11:16 ` Chris Wilson
  2016-04-14 11:24   ` Tvrtko Ursulin
  2016-04-14 16:37 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] " Patchwork
  3 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2016-04-14 11:16 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Thu, Apr 14, 2016 at 11:59:29AM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> We know ringbuffers are memory and not ports so if we use readl
> and writel instead of ioread32 and iowrite32 (which dispatch to
> the very same functions after checking the address range) we
> avoid generating functions calls and branching on every access.

We don't need to use readl/write at all, since they are normal memory
on llc, and on x86 we can pretend that iomaps (!llc/stolen) are as well.

This patch is in the queue along with killing the incorrect spare iomem
annotation.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] drm/i915: Use writel instead of iowrite32 when doing GTT relocations
  2016-04-14 10:59 ` [PATCH 2/3] drm/i915: Use writel instead of iowrite32 when doing GTT relocations Tvrtko Ursulin
@ 2016-04-14 11:17   ` Chris Wilson
  0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2016-04-14 11:17 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Thu, Apr 14, 2016 at 11:59:30AM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> We know that our aperture is memory and not ports so we can use
> the correct accessors straight away and avoid function calls.

There are much more complete patches to fix a host of issues here on the
list.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access
  2016-04-14 11:16 ` [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access Chris Wilson
@ 2016-04-14 11:24   ` Tvrtko Ursulin
  2016-04-14 11:30     ` Chris Wilson
  0 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2016-04-14 11:24 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx


On 14/04/16 12:16, Chris Wilson wrote:
> On Thu, Apr 14, 2016 at 11:59:29AM +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> We know ringbuffers are memory and not ports so if we use readl
>> and writel instead of ioread32 and iowrite32 (which dispatch to
>> the very same functions after checking the address range) we
>> avoid generating functions calls and branching on every access.
>
> We don't need to use readl/write at all, since they are normal memory
> on llc, and on x86 we can pretend that iomaps (!llc/stolen) are as well.

It is fine to use readl/writel since it translates to a single mov 
instruction anyway on x86.

> This patch is in the queue along with killing the incorrect spare iomem
> annotation.

Ok did not spot them. Don't mind either way, thought this is quick, easy 
and obvious improvement when I spotted the ugly code generated for ring 
buffer writing.

Mind you it is still not completely pretty with this patch since it is 
full of reloads and adds for ringbuf->virtual_start and tail which I 
can't figure how to help GCC optimize. Unless we make being, emit and 
advance functions return the current tail pointer and also accept it. In 
that case it all shrinks by half.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access
  2016-04-14 11:24   ` Tvrtko Ursulin
@ 2016-04-14 11:30     ` Chris Wilson
  2016-04-14 11:58       ` Tvrtko Ursulin
  0 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2016-04-14 11:30 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Thu, Apr 14, 2016 at 12:24:20PM +0100, Tvrtko Ursulin wrote:
> 
> On 14/04/16 12:16, Chris Wilson wrote:
> >On Thu, Apr 14, 2016 at 11:59:29AM +0100, Tvrtko Ursulin wrote:
> >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >>We know ringbuffers are memory and not ports so if we use readl
> >>and writel instead of ioread32 and iowrite32 (which dispatch to
> >>the very same functions after checking the address range) we
> >>avoid generating functions calls and branching on every access.
> >
> >We don't need to use readl/write at all, since they are normal memory
> >on llc, and on x86 we can pretend that iomaps (!llc/stolen) are as well.
> 
> It is fine to use readl/writel since it translates to a single mov
> instruction anyway on x86.
> 
> >This patch is in the queue along with killing the incorrect spare iomem
> >annotation.
> 
> Ok did not spot them. Don't mind either way, thought this is quick,
> easy and obvious improvement when I spotted the ugly code generated
> for ring buffer writing.
> 
> Mind you it is still not completely pretty with this patch since it
> is full of reloads and adds for ringbuf->virtual_start and tail
> which I can't figure how to help GCC optimize. Unless we make being,
> emit and advance functions return the current tail pointer and also
> accept it. In that case it all shrinks by half.

We figured out how to help gcc with that in userspace using:

out = ring_begin(num_dwords);
out[0] = cmd;
out[N] = dwN

GCC will then do

mov $imm0, 0x0($eax)
mov $imm1, 0x4($eax)
mov $edx, 0x8($eax)
etc

Forgive the clumsy rebasing:

https://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=tasklet&id=a5c7b28441af0cb0e640f4ba86facba69e8f6c37
 drivers/gpu/drm/i915/i915_gem.c            |   4 -
 drivers/gpu/drm/i915/i915_gem_context.c    |  76 +--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  37 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c        |  61 +-
 drivers/gpu/drm/i915/i915_gem_request.c    | 121 +++-
 drivers/gpu/drm/i915/i915_gem_request.h    |   3 +
 drivers/gpu/drm/i915/i915_guc_submission.c |   2 +-
 drivers/gpu/drm/i915/intel_display.c       | 134 ++---
 drivers/gpu/drm/i915/intel_lrc.c           | 239 ++++----
 drivers/gpu/drm/i915/intel_mocs.c          |  50 +-
 drivers/gpu/drm/i915/intel_overlay.c       |  77 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c    | 922 ++++++++++++-----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  28 +-
 13 files changed, 793 insertions(+), 961 deletions(-)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access
  2016-04-14 11:30     ` Chris Wilson
@ 2016-04-14 11:58       ` Tvrtko Ursulin
  2016-04-14 15:07         ` Dave Gordon
  0 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2016-04-14 11:58 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx


On 14/04/16 12:30, Chris Wilson wrote:
> On Thu, Apr 14, 2016 at 12:24:20PM +0100, Tvrtko Ursulin wrote:
>>
>> On 14/04/16 12:16, Chris Wilson wrote:
>>> On Thu, Apr 14, 2016 at 11:59:29AM +0100, Tvrtko Ursulin wrote:
>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>
>>>> We know ringbuffers are memory and not ports so if we use readl
>>>> and writel instead of ioread32 and iowrite32 (which dispatch to
>>>> the very same functions after checking the address range) we
>>>> avoid generating functions calls and branching on every access.
>>>
>>> We don't need to use readl/write at all, since they are normal memory
>>> on llc, and on x86 we can pretend that iomaps (!llc/stolen) are as well.
>>
>> It is fine to use readl/writel since it translates to a single mov
>> instruction anyway on x86.
>>
>>> This patch is in the queue along with killing the incorrect spare iomem
>>> annotation.
>>
>> Ok did not spot them. Don't mind either way, thought this is quick,
>> easy and obvious improvement when I spotted the ugly code generated
>> for ring buffer writing.
>>
>> Mind you it is still not completely pretty with this patch since it
>> is full of reloads and adds for ringbuf->virtual_start and tail
>> which I can't figure how to help GCC optimize. Unless we make being,
>> emit and advance functions return the current tail pointer and also
>> accept it. In that case it all shrinks by half.
>
> We figured out how to help gcc with that in userspace using:
>
> out = ring_begin(num_dwords);
> out[0] = cmd;
> out[N] = dwN
>
> GCC will then do
>
> mov $imm0, 0x0($eax)
> mov $imm1, 0x4($eax)
> mov $edx, 0x8($eax)
> etc

Would be nice, hope it happens soon. :)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access
  2016-04-14 11:58       ` Tvrtko Ursulin
@ 2016-04-14 15:07         ` Dave Gordon
  2016-04-14 15:55           ` Chris Wilson
  0 siblings, 1 reply; 12+ messages in thread
From: Dave Gordon @ 2016-04-14 15:07 UTC (permalink / raw)
  To: Tvrtko Ursulin, Chris Wilson, Intel-gfx

On 14/04/16 12:58, Tvrtko Ursulin wrote:
>
> On 14/04/16 12:30, Chris Wilson wrote:
>> On Thu, Apr 14, 2016 at 12:24:20PM +0100, Tvrtko Ursulin wrote:
>>>
>>> On 14/04/16 12:16, Chris Wilson wrote:
>>>> On Thu, Apr 14, 2016 at 11:59:29AM +0100, Tvrtko Ursulin wrote:
>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>
>>>>> We know ringbuffers are memory and not ports so if we use readl
>>>>> and writel instead of ioread32 and iowrite32 (which dispatch to
>>>>> the very same functions after checking the address range) we
>>>>> avoid generating functions calls and branching on every access.
>>>>
>>>> We don't need to use readl/write at all, since they are normal memory
>>>> on llc, and on x86 we can pretend that iomaps (!llc/stolen) are as
>>>> well.
>>>
>>> It is fine to use readl/writel since it translates to a single mov
>>> instruction anyway on x86.
>>>
>>>> This patch is in the queue along with killing the incorrect spare iomem
>>>> annotation.
>>>
>>> Ok did not spot them. Don't mind either way, thought this is quick,
>>> easy and obvious improvement when I spotted the ugly code generated
>>> for ring buffer writing.
>>>
>>> Mind you it is still not completely pretty with this patch since it
>>> is full of reloads and adds for ringbuf->virtual_start and tail
>>> which I can't figure how to help GCC optimize. Unless we make being,
>>> emit and advance functions return the current tail pointer and also
>>> accept it. In that case it all shrinks by half.
>>
>> We figured out how to help gcc with that in userspace using:
>>
>> out = ring_begin(num_dwords);
>> out[0] = cmd;
>> out[N] = dwN
>>
>> GCC will then do
>>
>> mov $imm0, 0x0($eax)
>> mov $imm1, 0x4($eax)
>> mov $edx, 0x8($eax)
>> etc
>
> Would be nice, hope it happens soon. :)
>
> Regards,
> Tvrtko

Another couple of alternative styles:

	DWORD* ptr = ring_begin(ring, nwords);
	*ptr++ = MI_WHATEVER;
	*ptr++ = param1;
	...
	ring_advance(ring, ptr);
	// this call checks that 'ptr' has not gone
	// beyond the nwords reserved above

Or collapse it all into one call:

	DWORD insns[OP_NWORDS] = {
		MI_WHATEVER,
		param1,
		...
	}
	ring_append(ring, nwords, insns);

which combines the check-and-wrap with a block copy to add all the 
instructions in one go :)

.Dave.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access
  2016-04-14 15:07         ` Dave Gordon
@ 2016-04-14 15:55           ` Chris Wilson
  0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2016-04-14 15:55 UTC (permalink / raw)
  To: Dave Gordon; +Cc: Intel-gfx

On Thu, Apr 14, 2016 at 04:07:58PM +0100, Dave Gordon wrote:
> On 14/04/16 12:58, Tvrtko Ursulin wrote:
> >
> >On 14/04/16 12:30, Chris Wilson wrote:
> >>On Thu, Apr 14, 2016 at 12:24:20PM +0100, Tvrtko Ursulin wrote:
> >>>
> >>>On 14/04/16 12:16, Chris Wilson wrote:
> >>>>On Thu, Apr 14, 2016 at 11:59:29AM +0100, Tvrtko Ursulin wrote:
> >>>>>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>>>
> >>>>>We know ringbuffers are memory and not ports so if we use readl
> >>>>>and writel instead of ioread32 and iowrite32 (which dispatch to
> >>>>>the very same functions after checking the address range) we
> >>>>>avoid generating functions calls and branching on every access.
> >>>>
> >>>>We don't need to use readl/write at all, since they are normal memory
> >>>>on llc, and on x86 we can pretend that iomaps (!llc/stolen) are as
> >>>>well.
> >>>
> >>>It is fine to use readl/writel since it translates to a single mov
> >>>instruction anyway on x86.
> >>>
> >>>>This patch is in the queue along with killing the incorrect spare iomem
> >>>>annotation.
> >>>
> >>>Ok did not spot them. Don't mind either way, thought this is quick,
> >>>easy and obvious improvement when I spotted the ugly code generated
> >>>for ring buffer writing.
> >>>
> >>>Mind you it is still not completely pretty with this patch since it
> >>>is full of reloads and adds for ringbuf->virtual_start and tail
> >>>which I can't figure how to help GCC optimize. Unless we make being,
> >>>emit and advance functions return the current tail pointer and also
> >>>accept it. In that case it all shrinks by half.
> >>
> >>We figured out how to help gcc with that in userspace using:
> >>
> >>out = ring_begin(num_dwords);
> >>out[0] = cmd;
> >>out[N] = dwN
> >>
> >>GCC will then do
> >>
> >>mov $imm0, 0x0($eax)
> >>mov $imm1, 0x4($eax)
> >>mov $edx, 0x8($eax)
> >>etc
> >
> >Would be nice, hope it happens soon. :)
> >
> >Regards,
> >Tvrtko
> 
> Another couple of alternative styles:
> 
> 	DWORD* ptr = ring_begin(ring, nwords);
> 	*ptr++ = MI_WHATEVER;
> 	*ptr++ = param1;

GCC turns *ptr++ into mov $val, (%eax); add $4, %eax

It's convenient for translating the for loops, but not as compact.

> 	...
> 	ring_advance(ring, ptr);
> 	// this call checks that 'ptr' has not gone
> 	// beyond the nwords reserved above
> 
> Or collapse it all into one call:
> 
> 	DWORD insns[OP_NWORDS] = {
> 		MI_WHATEVER,
> 		param1,
> 		...
> 	}
> 	ring_append(ring, nwords, insns);
> 
> which combines the check-and-wrap with a block copy to add all the
> instructions in one go :)

Considered that, but not seriously looked into what gcc does there -
mainly because that would involve even more complicated changes to the
code.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Use readl/writel for ring buffer access
  2016-04-14 10:59 [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  2016-04-14 11:16 ` [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access Chris Wilson
@ 2016-04-14 16:37 ` Patchwork
  2016-04-15  8:54   ` Tvrtko Ursulin
  3 siblings, 1 reply; 12+ messages in thread
From: Patchwork @ 2016-04-14 16:37 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Use readl/writel for ring buffer access
URL   : https://patchwork.freedesktop.org/series/5714/
State : failure

== Summary ==

Series 5714v1 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/5714/revisions/1/mbox/

Test drv_hangman:
        Subgroup error-state-basic:
                pass       -> FAIL       (ilk-hp8440p)
Test gem_busy:
        Subgroup basic-render:
                pass       -> SKIP       (ilk-hp8440p)
Test gem_exec_basic:
        Subgroup basic-render:
                pass       -> FAIL       (ilk-hp8440p)
        Subgroup gtt-bsd:
                pass       -> FAIL       (ilk-hp8440p)
        Subgroup readonly-default:
                pass       -> FAIL       (ilk-hp8440p)
        Subgroup readonly-render:
                pass       -> FAIL       (ilk-hp8440p)
Test gem_exec_create:
        Subgroup basic:
                pass       -> FAIL       (ilk-hp8440p)
Test gem_exec_store:
        Subgroup basic-all:
                pass       -> FAIL       (ilk-hp8440p)
        Subgroup basic-bsd:
                pass       -> FAIL       (ilk-hp8440p)
        Subgroup basic-default:
                pass       -> FAIL       (ilk-hp8440p)
Test gem_exec_whisper:
        Subgroup basic:
                pass       -> SKIP       (ilk-hp8440p)
Test gem_linear_blits:
        Subgroup basic:
                pass       -> FAIL       (ilk-hp8440p)
Test gem_mmap_gtt:
        Subgroup basic-small-copy-xy:
                pass       -> DMESG-WARN (ilk-hp8440p)
Test gem_ringfill:
        Subgroup basic-default-hang:
                pass       -> DMESG-FAIL (ilk-hp8440p)
Test gem_sync:
        Subgroup basic-all:
                pass       -> FAIL       (ilk-hp8440p)
        Subgroup basic-bsd:
                pass       -> FAIL       (ilk-hp8440p)
        Subgroup basic-render:
                pass       -> DMESG-FAIL (ilk-hp8440p)
Test gem_tiled_fence_blits:
        Subgroup basic:
                pass       -> FAIL       (ilk-hp8440p)
Test kms_addfb_basic:
        Subgroup addfb25-modifier-no-flag:
                pass       -> DMESG-WARN (ilk-hp8440p)
Test kms_flip:
        Subgroup basic-flip-vs-dpms:
                pass       -> DMESG-FAIL (ilk-hp8440p) UNSTABLE
        Subgroup basic-flip-vs-wf_vblank:
                pass       -> FAIL       (ilk-hp8440p) UNSTABLE
        Subgroup basic-plain-flip:
                pass       -> DMESG-WARN (ilk-hp8440p)
Test kms_force_connector_basic:
        Subgroup force-edid:
                skip       -> PASS       (hsw-gt2)
        Subgroup force-load-detect:
                skip       -> DMESG-WARN (ilk-hp8440p)
Test kms_pipe_crc_basic:
        Subgroup bad-nb-words-1:
                pass       -> DMESG-WARN (ilk-hp8440p)
        Subgroup bad-nb-words-3:
                pass       -> DMESG-WARN (ilk-hp8440p)
        Subgroup bad-pipe:
                pass       -> DMESG-WARN (ilk-hp8440p)
        Subgroup hang-read-crc-pipe-a:
                pass       -> DMESG-FAIL (ilk-hp8440p)
        Subgroup nonblocking-crc-pipe-b:
                pass       -> DMESG-FAIL (ilk-hp8440p)
        Subgroup read-crc-pipe-a-frame-sequence:
                pass       -> DMESG-FAIL (ilk-hp8440p)
        Subgroup read-crc-pipe-b:
                pass       -> DMESG-FAIL (ilk-hp8440p)
        Subgroup read-crc-pipe-b-frame-sequence:
                pass       -> DMESG-FAIL (ilk-hp8440p)
        Subgroup suspend-read-crc-pipe-c:
                skip       -> DMESG-FAIL (ilk-hp8440p)
Test kms_sink_crc_basic:
                skip       -> DMESG-FAIL (ilk-hp8440p)

bdw-ultra        total:203  pass:179  dwarn:0   dfail:0   fail:1   skip:23 
bsw-nuc-2        total:202  pass:162  dwarn:0   dfail:0   fail:1   skip:39 
byt-nuc          total:202  pass:164  dwarn:0   dfail:0   fail:0   skip:38 
hsw-brixbox      total:203  pass:178  dwarn:0   dfail:0   fail:1   skip:24 
hsw-gt2          total:203  pass:183  dwarn:0   dfail:0   fail:1   skip:19 
ilk-hp8440p      total:203  pass:103  dwarn:7   dfail:10  fail:15  skip:68 
ivb-t430s        total:203  pass:174  dwarn:0   dfail:0   fail:1   skip:28 
skl-i7k-2        total:203  pass:177  dwarn:0   dfail:0   fail:1   skip:25 
skl-nuci5        total:203  pass:191  dwarn:0   dfail:0   fail:1   skip:11 
snb-dellxps      total:203  pass:164  dwarn:0   dfail:0   fail:1   skip:38 
snb-x220t        total:203  pass:164  dwarn:0   dfail:0   fail:2   skip:37 
BOOT FAILED for bdw-nuci7

Results at /archive/results/CI_IGT_test/Patchwork_1901/

c7583aec08ba04e2336bd9879a10f30d4e0cdc60 drm-intel-nightly: 2016y-04m-14d-14h-53m-34s UTC integration manifest
b704cc8 drm/i915: Use writel instead of iowrite32 when programming page table entries
6a748d4 drm/i915: Use writel instead of iowrite32 when doing GTT relocations
8c397f2 drm/i915: Use readl/writel for ring buffer access

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Use readl/writel for ring buffer access
  2016-04-14 16:37 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] " Patchwork
@ 2016-04-15  8:54   ` Tvrtko Ursulin
  0 siblings, 0 replies; 12+ messages in thread
From: Tvrtko Ursulin @ 2016-04-15  8:54 UTC (permalink / raw)
  To: intel-gfx


On 14/04/16 17:37, Patchwork wrote:
> == Series Details ==
>
> Series: series starting with [1/3] drm/i915: Use readl/writel for ring buffer access
> URL   : https://patchwork.freedesktop.org/series/5714/
> State : failure
>
> == Summary ==
>
> Series 5714v1 Series without cover letter
> http://patchwork.freedesktop.org/api/1.0/series/5714/revisions/1/mbox/
>
> Test drv_hangman:
>          Subgroup error-state-basic:
>                  pass       -> FAIL       (ilk-hp8440p)
> Test gem_busy:
>          Subgroup basic-render:
>                  pass       -> SKIP       (ilk-hp8440p)
> Test gem_exec_basic:
>          Subgroup basic-render:
>                  pass       -> FAIL       (ilk-hp8440p)
>          Subgroup gtt-bsd:
>                  pass       -> FAIL       (ilk-hp8440p)
>          Subgroup readonly-default:
>                  pass       -> FAIL       (ilk-hp8440p)
>          Subgroup readonly-render:
>                  pass       -> FAIL       (ilk-hp8440p)
> Test gem_exec_create:
>          Subgroup basic:
>                  pass       -> FAIL       (ilk-hp8440p)
> Test gem_exec_store:
>          Subgroup basic-all:
>                  pass       -> FAIL       (ilk-hp8440p)
>          Subgroup basic-bsd:
>                  pass       -> FAIL       (ilk-hp8440p)
>          Subgroup basic-default:
>                  pass       -> FAIL       (ilk-hp8440p)
> Test gem_exec_whisper:
>          Subgroup basic:
>                  pass       -> SKIP       (ilk-hp8440p)
> Test gem_linear_blits:
>          Subgroup basic:
>                  pass       -> FAIL       (ilk-hp8440p)
> Test gem_mmap_gtt:
>          Subgroup basic-small-copy-xy:
>                  pass       -> DMESG-WARN (ilk-hp8440p)
> Test gem_ringfill:
>          Subgroup basic-default-hang:
>                  pass       -> DMESG-FAIL (ilk-hp8440p)
> Test gem_sync:
>          Subgroup basic-all:
>                  pass       -> FAIL       (ilk-hp8440p)
>          Subgroup basic-bsd:
>                  pass       -> FAIL       (ilk-hp8440p)
>          Subgroup basic-render:
>                  pass       -> DMESG-FAIL (ilk-hp8440p)
> Test gem_tiled_fence_blits:
>          Subgroup basic:
>                  pass       -> FAIL       (ilk-hp8440p)
> Test kms_addfb_basic:
>          Subgroup addfb25-modifier-no-flag:
>                  pass       -> DMESG-WARN (ilk-hp8440p)
> Test kms_flip:
>          Subgroup basic-flip-vs-dpms:
>                  pass       -> DMESG-FAIL (ilk-hp8440p) UNSTABLE
>          Subgroup basic-flip-vs-wf_vblank:
>                  pass       -> FAIL       (ilk-hp8440p) UNSTABLE
>          Subgroup basic-plain-flip:
>                  pass       -> DMESG-WARN (ilk-hp8440p)
> Test kms_force_connector_basic:
>          Subgroup force-edid:
>                  skip       -> PASS       (hsw-gt2)
>          Subgroup force-load-detect:
>                  skip       -> DMESG-WARN (ilk-hp8440p)
> Test kms_pipe_crc_basic:
>          Subgroup bad-nb-words-1:
>                  pass       -> DMESG-WARN (ilk-hp8440p)
>          Subgroup bad-nb-words-3:
>                  pass       -> DMESG-WARN (ilk-hp8440p)
>          Subgroup bad-pipe:
>                  pass       -> DMESG-WARN (ilk-hp8440p)
>          Subgroup hang-read-crc-pipe-a:
>                  pass       -> DMESG-FAIL (ilk-hp8440p)
>          Subgroup nonblocking-crc-pipe-b:
>                  pass       -> DMESG-FAIL (ilk-hp8440p)
>          Subgroup read-crc-pipe-a-frame-sequence:
>                  pass       -> DMESG-FAIL (ilk-hp8440p)
>          Subgroup read-crc-pipe-b:
>                  pass       -> DMESG-FAIL (ilk-hp8440p)
>          Subgroup read-crc-pipe-b-frame-sequence:
>                  pass       -> DMESG-FAIL (ilk-hp8440p)
>          Subgroup suspend-read-crc-pipe-c:
>                  skip       -> DMESG-FAIL (ilk-hp8440p)
> Test kms_sink_crc_basic:
>                  skip       -> DMESG-FAIL (ilk-hp8440p)
>
> bdw-ultra        total:203  pass:179  dwarn:0   dfail:0   fail:1   skip:23
> bsw-nuc-2        total:202  pass:162  dwarn:0   dfail:0   fail:1   skip:39
> byt-nuc          total:202  pass:164  dwarn:0   dfail:0   fail:0   skip:38
> hsw-brixbox      total:203  pass:178  dwarn:0   dfail:0   fail:1   skip:24
> hsw-gt2          total:203  pass:183  dwarn:0   dfail:0   fail:1   skip:19
> ilk-hp8440p      total:203  pass:103  dwarn:7   dfail:10  fail:15  skip:68
> ivb-t430s        total:203  pass:174  dwarn:0   dfail:0   fail:1   skip:28
> skl-i7k-2        total:203  pass:177  dwarn:0   dfail:0   fail:1   skip:25
> skl-nuci5        total:203  pass:191  dwarn:0   dfail:0   fail:1   skip:11
> snb-dellxps      total:203  pass:164  dwarn:0   dfail:0   fail:1   skip:38
> snb-x220t        total:203  pass:164  dwarn:0   dfail:0   fail:2   skip:37
> BOOT FAILED for bdw-nuci7
>
> Results at /archive/results/CI_IGT_test/Patchwork_1901/
>
> c7583aec08ba04e2336bd9879a10f30d4e0cdc60 drm-intel-nightly: 2016y-04m-14d-14h-53m-34s UTC integration manifest
> b704cc8 drm/i915: Use writel instead of iowrite32 when programming page table entries
> 6a748d4 drm/i915: Use writel instead of iowrite32 when doing GTT relocations
> 8c397f2 drm/i915: Use readl/writel for ring buffer access

Oh this breaks ILK - what it special about it?

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-04-15  8:54 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-04-14 10:59 [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access Tvrtko Ursulin
2016-04-14 10:59 ` [PATCH 2/3] drm/i915: Use writel instead of iowrite32 when doing GTT relocations Tvrtko Ursulin
2016-04-14 11:17   ` Chris Wilson
2016-04-14 10:59 ` [PATCH 3/3] drm/i915: Use writel instead of iowrite32 when programming page table entries Tvrtko Ursulin
2016-04-14 11:16 ` [PATCH 1/3] drm/i915: Use readl/writel for ring buffer access Chris Wilson
2016-04-14 11:24   ` Tvrtko Ursulin
2016-04-14 11:30     ` Chris Wilson
2016-04-14 11:58       ` Tvrtko Ursulin
2016-04-14 15:07         ` Dave Gordon
2016-04-14 15:55           ` Chris Wilson
2016-04-14 16:37 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] " Patchwork
2016-04-15  8:54   ` Tvrtko Ursulin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox