* [PATCH 1/2] drm/i915: Embed the ring->private within the struct intel_ring_buffer
@ 2013-08-26 19:58 Chris Wilson
2013-08-26 19:58 ` [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+ Chris Wilson
0 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2013-08-26 19:58 UTC (permalink / raw)
To: intel-gfx
We now have more devices using ring->private than not, and they all want
the same structure. Worse, I would like to use a scratch page from
outside of intel_ringbuffer.c and so for convenience would like to reuse
ring->private. Embed the object into the struct intel_ringbuffer so that
we can keep the code clean.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gpu_error.c | 2 +-
drivers/gpu/drm/i915/intel_ringbuffer.c | 99 ++++++++++-----------------------
drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +-
3 files changed, 35 insertions(+), 72 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index e8955e7..3b003af 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -644,7 +644,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
if (WARN_ON(ring->id != RCS))
return NULL;
- obj = ring->private;
+ obj = ring->scratch.obj;
if (acthd >= i915_gem_obj_ggtt_offset(obj) &&
acthd < i915_gem_obj_ggtt_offset(obj) + obj->base.size)
return i915_error_object_create(dev_priv, obj);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b0fb5ce..7fa52bd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -33,16 +33,6 @@
#include "i915_trace.h"
#include "intel_drv.h"
-/*
- * 965+ support PIPE_CONTROL commands, which provide finer grained control
- * over cache flushing.
- */
-struct pipe_control {
- struct drm_i915_gem_object *obj;
- volatile u32 *cpu_page;
- u32 gtt_offset;
-};
-
static inline int ring_space(struct intel_ring_buffer *ring)
{
int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE);
@@ -185,8 +175,7 @@ gen4_render_ring_flush(struct intel_ring_buffer *ring,
static int
intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
{
- struct pipe_control *pc = ring->private;
- u32 scratch_addr = pc->gtt_offset + 128;
+ u32 scratch_addr = ring->scratch.gtt_offset + 128;
int ret;
@@ -223,8 +212,7 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
u32 invalidate_domains, u32 flush_domains)
{
u32 flags = 0;
- struct pipe_control *pc = ring->private;
- u32 scratch_addr = pc->gtt_offset + 128;
+ u32 scratch_addr = ring->scratch.gtt_offset + 128;
int ret;
/* Force SNB workarounds for PIPE_CONTROL flushes */
@@ -316,8 +304,7 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
u32 invalidate_domains, u32 flush_domains)
{
u32 flags = 0;
- struct pipe_control *pc = ring->private;
- u32 scratch_addr = pc->gtt_offset + 128;
+ u32 scratch_addr = ring->scratch.gtt_offset + 128;
int ret;
/*
@@ -491,68 +478,43 @@ out:
static int
init_pipe_control(struct intel_ring_buffer *ring)
{
- struct pipe_control *pc;
- struct drm_i915_gem_object *obj;
int ret;
- if (ring->private)
+ if (ring->scratch.obj)
return 0;
- pc = kmalloc(sizeof(*pc), GFP_KERNEL);
- if (!pc)
- return -ENOMEM;
-
- obj = i915_gem_alloc_object(ring->dev, 4096);
- if (obj == NULL) {
+ ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
+ if (ring->scratch.obj == NULL) {
DRM_ERROR("Failed to allocate seqno page\n");
ret = -ENOMEM;
goto err;
}
- i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+ i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
- ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false);
+ ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, true, false);
if (ret)
goto err_unref;
- pc->gtt_offset = i915_gem_obj_ggtt_offset(obj);
- pc->cpu_page = kmap(sg_page(obj->pages->sgl));
- if (pc->cpu_page == NULL) {
+ ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj);
+ ring->scratch.cpu_page = kmap(sg_page(ring->scratch.obj->pages->sgl));
+ if (ring->scratch.cpu_page == NULL) {
ret = -ENOMEM;
goto err_unpin;
}
DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
- ring->name, pc->gtt_offset);
-
- pc->obj = obj;
- ring->private = pc;
+ ring->name, ring->scratch.gtt_offset);
return 0;
err_unpin:
- i915_gem_object_unpin(obj);
+ i915_gem_object_unpin(ring->scratch.obj);
err_unref:
- drm_gem_object_unreference(&obj->base);
+ drm_gem_object_unreference(&ring->scratch.obj->base);
err:
- kfree(pc);
return ret;
}
-static void
-cleanup_pipe_control(struct intel_ring_buffer *ring)
-{
- struct pipe_control *pc = ring->private;
- struct drm_i915_gem_object *obj;
-
- obj = pc->obj;
-
- kunmap(sg_page(obj->pages->sgl));
- i915_gem_object_unpin(obj);
- drm_gem_object_unreference(&obj->base);
-
- kfree(pc);
-}
-
static int init_render_ring(struct intel_ring_buffer *ring)
{
struct drm_device *dev = ring->dev;
@@ -617,16 +579,16 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring)
{
struct drm_device *dev = ring->dev;
- if (!ring->private)
+ if (ring->scratch.obj == NULL)
return;
- if (HAS_BROKEN_CS_TLB(dev))
- drm_gem_object_unreference(to_gem_object(ring->private));
-
- if (INTEL_INFO(dev)->gen >= 5)
- cleanup_pipe_control(ring);
+ if (INTEL_INFO(dev)->gen >= 5) {
+ kunmap(sg_page(ring->scratch.obj->pages->sgl));
+ i915_gem_object_unpin(ring->scratch.obj);
+ }
- ring->private = NULL;
+ drm_gem_object_unreference(&ring->scratch.obj->base);
+ ring->scratch.obj = NULL;
}
static void
@@ -752,8 +714,7 @@ do { \
static int
pc_render_add_request(struct intel_ring_buffer *ring)
{
- struct pipe_control *pc = ring->private;
- u32 scratch_addr = pc->gtt_offset + 128;
+ u32 scratch_addr = ring->scratch.gtt_offset + 128;
int ret;
/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
@@ -771,7 +732,7 @@ pc_render_add_request(struct intel_ring_buffer *ring)
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_WRITE_FLUSH |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
- intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
+ intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
intel_ring_emit(ring, ring->outstanding_lazy_request);
intel_ring_emit(ring, 0);
PIPE_CONTROL_FLUSH(ring, scratch_addr);
@@ -790,7 +751,7 @@ pc_render_add_request(struct intel_ring_buffer *ring)
PIPE_CONTROL_WRITE_FLUSH |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_NOTIFY);
- intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
+ intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
intel_ring_emit(ring, ring->outstanding_lazy_request);
intel_ring_emit(ring, 0);
__intel_ring_advance(ring);
@@ -824,15 +785,13 @@ ring_set_seqno(struct intel_ring_buffer *ring, u32 seqno)
static u32
pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
{
- struct pipe_control *pc = ring->private;
- return pc->cpu_page[0];
+ return ring->scratch.cpu_page[0];
}
static void
pc_render_set_seqno(struct intel_ring_buffer *ring, u32 seqno)
{
- struct pipe_control *pc = ring->private;
- pc->cpu_page[0] = seqno;
+ ring->scratch.cpu_page[0] = seqno;
}
static bool
@@ -1151,8 +1110,7 @@ i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
} else {
- struct drm_i915_gem_object *obj = ring->private;
- u32 cs_offset = i915_gem_obj_ggtt_offset(obj);
+ u32 cs_offset = ring->scratch.gtt_offset;
if (len > I830_BATCH_LIMIT)
return -ENOSPC;
@@ -1835,7 +1793,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
return ret;
}
- ring->private = obj;
+ ring->scratch.obj = obj;
+ ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
}
return intel_init_ring_buffer(dev, ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e9e6a52..92c8574 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -155,7 +155,11 @@ struct intel_ring_buffer {
struct intel_ring_hangcheck hangcheck;
- void *private;
+ struct {
+ struct drm_i915_gem_object *obj;
+ u32 gtt_offset;
+ volatile u32 *cpu_page;
+ } scratch;
};
static inline bool
--
1.8.4.rc3
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+
2013-08-26 19:58 [PATCH 1/2] drm/i915: Embed the ring->private within the struct intel_ring_buffer Chris Wilson
@ 2013-08-26 19:58 ` Chris Wilson
2013-08-26 20:42 ` Stéphane Marchesin
2013-08-29 19:07 ` Daniel Vetter
0 siblings, 2 replies; 12+ messages in thread
From: Chris Wilson @ 2013-08-26 19:58 UTC (permalink / raw)
To: intel-gfx; +Cc: Ben Widawsky, Stephane Marchesin
RCS flips do work on Iybridge+ so long as we can unmask the messages
through DERRMR. However, there are quite a few workarounds mentioned
regarding unmasking more than one event or triggering more than one
message through DERRMR. Those workarounds in principle prevent us from
performing pipelined flips (and asynchronous flips across multiple
planes) and equally apply to the "known good" BCS ring. Given that it
already appears to work, and also appears to work with unmasking all 3
planes at once (and queuing flips across multiple planes), be brave.
Bugzlla: https://bugs.freedesktop.org/show_bug.cgi?id=67600
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Lightly-tested-by: Stephane Marchesin <marchesin@icps.u-strasbg.fr>
Cc: Stephane Marchesin <marchesin@icps.u-strasbg.fr>
Cc: Ben Widawsky <ben@bwidawsk.net>
---
drivers/gpu/drm/i915/i915_reg.h | 18 ++++++++++++++++
drivers/gpu/drm/i915/intel_display.c | 40 ++++++++++++++++++++++++++++--------
2 files changed, 49 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c6f5009..df168f4 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -230,6 +230,7 @@
* address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
*/
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*x-1)
+#define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*x-1)
#define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */
#define MI_FLUSH_DW_STORE_INDEX (1<<21)
#define MI_INVALIDATE_TLB (1<<18)
@@ -678,6 +679,23 @@
#define FPGA_DBG_RM_NOCLAIM (1<<31)
#define DERRMR 0x44050
+#define DERRMR_PIPEA_SCANLINE (1<<0)
+#define DERRMR_PIPEA_PRI_FLIP_DONE (1<<1)
+#define DERRMR_PIPEA_SPR_FLIP_DONE (1<<2)
+#define DERRMR_PIPEA_VBLANK (1<<3)
+#define DERRMR_PIPEA_HBLANK (1<<5)
+#define DERRMR_PIPEB_SCANLINE (1<<8)
+#define DERRMR_PIPEB_PRI_FLIP_DONE (1<<9)
+#define DERRMR_PIPEB_SPR_FLIP_DONE (1<<10)
+#define DERRMR_PIPEB_VBLANK (1<<11)
+#define DERRMR_PIPEB_HBLANK (1<<13)
+/* Note that PIPEC is not a simple translation of PIPEA/PIPEB */
+#define DERRMR_PIPEC_SCANLINE (1<<14)
+#define DERRMR_PIPEC_PRI_FLIP_DONE (1<<15)
+#define DERRMR_PIPEC_SPR_FLIP_DONE (1<<20)
+#define DERRMR_PIPEC_VBLANK (1<<21)
+#define DERRMR_PIPEC_HBLANK (1<<22)
+
/* GM45+ chicken bits -- debug workaround bits that may be required
* for various sorts of correct behavior. The top 16 bits of each are
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 9748dce..ffbcbd1 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7826,12 +7826,6 @@ err:
return ret;
}
-/*
- * On gen7 we currently use the blit ring because (in early silicon at least)
- * the render ring doesn't give us interrpts for page flip completion, which
- * means clients will hang after the first flip is queued. Fortunately the
- * blit ring generates interrupts properly, so use it instead.
- */
static int intel_gen7_queue_flip(struct drm_device *dev,
struct drm_crtc *crtc,
struct drm_framebuffer *fb,
@@ -7839,9 +7833,13 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
+ struct intel_ring_buffer *ring;
uint32_t plane_bit = 0;
- int ret;
+ int len, ret;
+
+ ring = obj->ring;
+ if (ring == NULL || ring->id != RCS)
+ ring = &dev_priv->ring[BCS];
ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
if (ret)
@@ -7863,10 +7861,34 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
goto err_unpin;
}
- ret = intel_ring_begin(ring, 4);
+ len = 4;
+ if (ring->id == RCS)
+ len += 6;
+
+ ret = intel_ring_begin(ring, len);
if (ret)
goto err_unpin;
+ /* Unmask the flip-done completion message. Note that the bspec says that
+ * we should do this for both the BCS and RCS, and that we must not unmask
+ * more than one flip event at any time (or ensure that one flip message
+ * can be sent by waiting for flip-done prior to queueing new flips).
+ * Experimentation says that BCS works despite DERRMR masking all
+ * flip-done completion events and that unmasking all planes at once
+ * for the RCS also doesn't appear to drop events. Setting the DERRMR
+ * to zero does lead to lockups within MI_DISPLAY_FLIP.
+ */
+ if (ring->id == RCS) {
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, DERRMR);
+ intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
+ DERRMR_PIPEB_PRI_FLIP_DONE |
+ DERRMR_PIPEC_PRI_FLIP_DONE));
+ intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1));
+ intel_ring_emit(ring, DERRMR);
+ intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
+ }
+
intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
--
1.8.4.rc3
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+
2013-08-26 19:58 ` [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+ Chris Wilson
@ 2013-08-26 20:42 ` Stéphane Marchesin
2013-08-26 20:43 ` Stéphane Marchesin
2013-08-29 19:07 ` Daniel Vetter
1 sibling, 1 reply; 12+ messages in thread
From: Stéphane Marchesin @ 2013-08-26 20:42 UTC (permalink / raw)
To: Chris Wilson; +Cc: Intel Graphics Development, Ben Widawsky
[-- Attachment #1.1: Type: text/plain, Size: 6360 bytes --]
On Mon, Aug 26, 2013 at 12:58 PM, Chris Wilson <chris@chris-wilson.co.uk>wrote:
> RCS flips do work on Iybridge+ so long as we can unmask the messages
> through DERRMR. However, there are quite a few workarounds mentioned
> regarding unmasking more than one event or triggering more than one
> message through DERRMR. Those workarounds in principle prevent us from
> performing pipelined flips (and asynchronous flips across multiple
> planes) and equally apply to the "known good" BCS ring. Given that it
> already appears to work, and also appears to work with unmasking all 3
> planes at once (and queuing flips across multiple planes), be brave.
>
> Bugzlla: https://bugs.freedesktop.org/show_bug.cgi?id=67600
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Lightly-tested-by: Stephane Marchesin <marchesin@icps.u-strasbg.fr>
>
You can go ahead and say Tested-by, I ran my usual tests for 3 hours and it
didn't crash/show an issue. It would crash in ~10-30 minutes with the other
patches.
Stéphane
> Cc: Stephane Marchesin <marchesin@icps.u-strasbg.fr>
> Cc: Ben Widawsky <ben@bwidawsk.net>
> ---
> drivers/gpu/drm/i915/i915_reg.h | 18 ++++++++++++++++
> drivers/gpu/drm/i915/intel_display.c | 40
> ++++++++++++++++++++++++++++--------
> 2 files changed, 49 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h
> b/drivers/gpu/drm/i915/i915_reg.h
> index c6f5009..df168f4 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -230,6 +230,7 @@
> * address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
> */
> #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*x-1)
> +#define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*x-1)
> #define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */
> #define MI_FLUSH_DW_STORE_INDEX (1<<21)
> #define MI_INVALIDATE_TLB (1<<18)
> @@ -678,6 +679,23 @@
> #define FPGA_DBG_RM_NOCLAIM (1<<31)
>
> #define DERRMR 0x44050
> +#define DERRMR_PIPEA_SCANLINE (1<<0)
> +#define DERRMR_PIPEA_PRI_FLIP_DONE (1<<1)
> +#define DERRMR_PIPEA_SPR_FLIP_DONE (1<<2)
> +#define DERRMR_PIPEA_VBLANK (1<<3)
> +#define DERRMR_PIPEA_HBLANK (1<<5)
> +#define DERRMR_PIPEB_SCANLINE (1<<8)
> +#define DERRMR_PIPEB_PRI_FLIP_DONE (1<<9)
> +#define DERRMR_PIPEB_SPR_FLIP_DONE (1<<10)
> +#define DERRMR_PIPEB_VBLANK (1<<11)
> +#define DERRMR_PIPEB_HBLANK (1<<13)
> +/* Note that PIPEC is not a simple translation of PIPEA/PIPEB */
> +#define DERRMR_PIPEC_SCANLINE (1<<14)
> +#define DERRMR_PIPEC_PRI_FLIP_DONE (1<<15)
> +#define DERRMR_PIPEC_SPR_FLIP_DONE (1<<20)
> +#define DERRMR_PIPEC_VBLANK (1<<21)
> +#define DERRMR_PIPEC_HBLANK (1<<22)
> +
>
> /* GM45+ chicken bits -- debug workaround bits that may be required
> * for various sorts of correct behavior. The top 16 bits of each are
> diff --git a/drivers/gpu/drm/i915/intel_display.c
> b/drivers/gpu/drm/i915/intel_display.c
> index 9748dce..ffbcbd1 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -7826,12 +7826,6 @@ err:
> return ret;
> }
>
> -/*
> - * On gen7 we currently use the blit ring because (in early silicon at
> least)
> - * the render ring doesn't give us interrpts for page flip completion,
> which
> - * means clients will hang after the first flip is queued. Fortunately
> the
> - * blit ring generates interrupts properly, so use it instead.
> - */
> static int intel_gen7_queue_flip(struct drm_device *dev,
> struct drm_crtc *crtc,
> struct drm_framebuffer *fb,
> @@ -7839,9 +7833,13 @@ static int intel_gen7_queue_flip(struct drm_device
> *dev,
> {
> struct drm_i915_private *dev_priv = dev->dev_private;
> struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> - struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
> + struct intel_ring_buffer *ring;
> uint32_t plane_bit = 0;
> - int ret;
> + int len, ret;
> +
> + ring = obj->ring;
> + if (ring == NULL || ring->id != RCS)
> + ring = &dev_priv->ring[BCS];
>
> ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
> if (ret)
> @@ -7863,10 +7861,34 @@ static int intel_gen7_queue_flip(struct drm_device
> *dev,
> goto err_unpin;
> }
>
> - ret = intel_ring_begin(ring, 4);
> + len = 4;
> + if (ring->id == RCS)
> + len += 6;
> +
> + ret = intel_ring_begin(ring, len);
> if (ret)
> goto err_unpin;
>
> + /* Unmask the flip-done completion message. Note that the bspec
> says that
> + * we should do this for both the BCS and RCS, and that we must
> not unmask
> + * more than one flip event at any time (or ensure that one flip
> message
> + * can be sent by waiting for flip-done prior to queueing new
> flips).
> + * Experimentation says that BCS works despite DERRMR masking all
> + * flip-done completion events and that unmasking all planes at
> once
> + * for the RCS also doesn't appear to drop events. Setting the
> DERRMR
> + * to zero does lead to lockups within MI_DISPLAY_FLIP.
> + */
> + if (ring->id == RCS) {
> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> + intel_ring_emit(ring, DERRMR);
> + intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
> + DERRMR_PIPEB_PRI_FLIP_DONE |
> + DERRMR_PIPEC_PRI_FLIP_DONE));
> + intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1));
> + intel_ring_emit(ring, DERRMR);
> + intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
> + }
> +
> intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
> intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
> intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) +
> intel_crtc->dspaddr_offset);
> --
> 1.8.4.rc3
>
[-- Attachment #1.2: Type: text/html, Size: 7572 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+
2013-08-26 20:42 ` Stéphane Marchesin
@ 2013-08-26 20:43 ` Stéphane Marchesin
2013-08-26 20:49 ` Chris Wilson
2013-08-29 17:11 ` Stéphane Marchesin
0 siblings, 2 replies; 12+ messages in thread
From: Stéphane Marchesin @ 2013-08-26 20:43 UTC (permalink / raw)
To: Chris Wilson; +Cc: Intel Graphics Development, Ben Widawsky
[-- Attachment #1.1: Type: text/plain, Size: 6712 bytes --]
On Mon, Aug 26, 2013 at 1:42 PM, Stéphane Marchesin <
stephane.marchesin@gmail.com> wrote:
>
>
>
> On Mon, Aug 26, 2013 at 12:58 PM, Chris Wilson <chris@chris-wilson.co.uk>wrote:
>
>> RCS flips do work on Iybridge+ so long as we can unmask the messages
>> through DERRMR. However, there are quite a few workarounds mentioned
>> regarding unmasking more than one event or triggering more than one
>> message through DERRMR. Those workarounds in principle prevent us from
>> performing pipelined flips (and asynchronous flips across multiple
>> planes) and equally apply to the "known good" BCS ring. Given that it
>> already appears to work, and also appears to work with unmasking all 3
>> planes at once (and queuing flips across multiple planes), be brave.
>>
>> Bugzlla: https://bugs.freedesktop.org/show_bug.cgi?id=67600
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Lightly-tested-by: Stephane Marchesin <marchesin@icps.u-strasbg.fr>
>>
>
> You can go ahead and say Tested-by, I ran my usual tests for 3 hours and
> it didn't crash/show an issue. It would crash in ~10-30 minutes with the
> other patches.
>
Oh actually this one is a bit different... Give me 3 hours :)
Stéphane
>
> Stéphane
>
>
>> Cc: Stephane Marchesin <marchesin@icps.u-strasbg.fr>
>> Cc: Ben Widawsky <ben@bwidawsk.net>
>> ---
>> drivers/gpu/drm/i915/i915_reg.h | 18 ++++++++++++++++
>> drivers/gpu/drm/i915/intel_display.c | 40
>> ++++++++++++++++++++++++++++--------
>> 2 files changed, 49 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>> b/drivers/gpu/drm/i915/i915_reg.h
>> index c6f5009..df168f4 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -230,6 +230,7 @@
>> * address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
>> */
>> #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*x-1)
>> +#define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*x-1)
>> #define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */
>> #define MI_FLUSH_DW_STORE_INDEX (1<<21)
>> #define MI_INVALIDATE_TLB (1<<18)
>> @@ -678,6 +679,23 @@
>> #define FPGA_DBG_RM_NOCLAIM (1<<31)
>>
>> #define DERRMR 0x44050
>> +#define DERRMR_PIPEA_SCANLINE (1<<0)
>> +#define DERRMR_PIPEA_PRI_FLIP_DONE (1<<1)
>> +#define DERRMR_PIPEA_SPR_FLIP_DONE (1<<2)
>> +#define DERRMR_PIPEA_VBLANK (1<<3)
>> +#define DERRMR_PIPEA_HBLANK (1<<5)
>> +#define DERRMR_PIPEB_SCANLINE (1<<8)
>> +#define DERRMR_PIPEB_PRI_FLIP_DONE (1<<9)
>> +#define DERRMR_PIPEB_SPR_FLIP_DONE (1<<10)
>> +#define DERRMR_PIPEB_VBLANK (1<<11)
>> +#define DERRMR_PIPEB_HBLANK (1<<13)
>> +/* Note that PIPEC is not a simple translation of PIPEA/PIPEB */
>> +#define DERRMR_PIPEC_SCANLINE (1<<14)
>> +#define DERRMR_PIPEC_PRI_FLIP_DONE (1<<15)
>> +#define DERRMR_PIPEC_SPR_FLIP_DONE (1<<20)
>> +#define DERRMR_PIPEC_VBLANK (1<<21)
>> +#define DERRMR_PIPEC_HBLANK (1<<22)
>> +
>>
>> /* GM45+ chicken bits -- debug workaround bits that may be required
>> * for various sorts of correct behavior. The top 16 bits of each are
>> diff --git a/drivers/gpu/drm/i915/intel_display.c
>> b/drivers/gpu/drm/i915/intel_display.c
>> index 9748dce..ffbcbd1 100644
>> --- a/drivers/gpu/drm/i915/intel_display.c
>> +++ b/drivers/gpu/drm/i915/intel_display.c
>> @@ -7826,12 +7826,6 @@ err:
>> return ret;
>> }
>>
>> -/*
>> - * On gen7 we currently use the blit ring because (in early silicon at
>> least)
>> - * the render ring doesn't give us interrpts for page flip completion,
>> which
>> - * means clients will hang after the first flip is queued. Fortunately
>> the
>> - * blit ring generates interrupts properly, so use it instead.
>> - */
>> static int intel_gen7_queue_flip(struct drm_device *dev,
>> struct drm_crtc *crtc,
>> struct drm_framebuffer *fb,
>> @@ -7839,9 +7833,13 @@ static int intel_gen7_queue_flip(struct drm_device
>> *dev,
>> {
>> struct drm_i915_private *dev_priv = dev->dev_private;
>> struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>> - struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
>> + struct intel_ring_buffer *ring;
>> uint32_t plane_bit = 0;
>> - int ret;
>> + int len, ret;
>> +
>> + ring = obj->ring;
>> + if (ring == NULL || ring->id != RCS)
>> + ring = &dev_priv->ring[BCS];
>>
>> ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
>> if (ret)
>> @@ -7863,10 +7861,34 @@ static int intel_gen7_queue_flip(struct
>> drm_device *dev,
>> goto err_unpin;
>> }
>>
>> - ret = intel_ring_begin(ring, 4);
>> + len = 4;
>> + if (ring->id == RCS)
>> + len += 6;
>> +
>> + ret = intel_ring_begin(ring, len);
>> if (ret)
>> goto err_unpin;
>>
>> + /* Unmask the flip-done completion message. Note that the bspec
>> says that
>> + * we should do this for both the BCS and RCS, and that we must
>> not unmask
>> + * more than one flip event at any time (or ensure that one flip
>> message
>> + * can be sent by waiting for flip-done prior to queueing new
>> flips).
>> + * Experimentation says that BCS works despite DERRMR masking all
>> + * flip-done completion events and that unmasking all planes at
>> once
>> + * for the RCS also doesn't appear to drop events. Setting the
>> DERRMR
>> + * to zero does lead to lockups within MI_DISPLAY_FLIP.
>> + */
>> + if (ring->id == RCS) {
>> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> + intel_ring_emit(ring, DERRMR);
>> + intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
>> + DERRMR_PIPEB_PRI_FLIP_DONE |
>> + DERRMR_PIPEC_PRI_FLIP_DONE));
>> + intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1));
>> + intel_ring_emit(ring, DERRMR);
>> + intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
>> + }
>> +
>> intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
>> intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
>> intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) +
>> intel_crtc->dspaddr_offset);
>> --
>> 1.8.4.rc3
>>
>
>
[-- Attachment #1.2: Type: text/html, Size: 8480 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+
2013-08-26 20:43 ` Stéphane Marchesin
@ 2013-08-26 20:49 ` Chris Wilson
2013-08-29 17:11 ` Stéphane Marchesin
1 sibling, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2013-08-26 20:49 UTC (permalink / raw)
To: Stéphane Marchesin; +Cc: Intel Graphics Development, Ben Widawsky
On Mon, Aug 26, 2013 at 01:43:25PM -0700, Stéphane Marchesin wrote:
> On Mon, Aug 26, 2013 at 1:42 PM, St�phane Marchesin
> <[1]stephane.marchesin@gmail.com> wrote:
>
> On Mon, Aug 26, 2013 at 12:58 PM, Chris Wilson
> <[2]chris@chris-wilson.co.uk> wrote:
>
> RCS flips do work on Iybridge+ so long as we can unmask the messages
> through DERRMR. However, there are quite a few workarounds mentioned
> regarding unmasking more than one event or triggering more than one
> message through DERRMR. Those workarounds in principle prevent us from
> performing pipelined flips (and asynchronous flips across multiple
> planes) and equally apply to the "known good" BCS ring. Given that it
> already appears to work, and also appears to work with unmasking all 3
> planes at once (and queuing flips across multiple planes), be brave.
>
> Bugzlla: [3]https://bugs.freedesktop.org/show_bug.cgi?id=67600
> Signed-off-by: Chris Wilson <[4]chris@chris-wilson.co.uk>
> Lightly-tested-by: Stephane Marchesin <[5]marchesin@icps.u-strasbg.fr>
>
> You can go ahead and say Tested-by, I ran my usual tests for 3 hours and
> it didn't crash/show an issue. It would crash in ~10-30 minutes with the
> other patches.
>
> Oh actually this one is a bit different... Give me 3 hours :)
> St�phane
I should have mentionted this was v2, and your testing was for v1. Hopefully,
I've moved the code around correctly.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+
2013-08-26 20:43 ` Stéphane Marchesin
2013-08-26 20:49 ` Chris Wilson
@ 2013-08-29 17:11 ` Stéphane Marchesin
1 sibling, 0 replies; 12+ messages in thread
From: Stéphane Marchesin @ 2013-08-29 17:11 UTC (permalink / raw)
To: Stéphane Marchesin; +Cc: Intel Graphics Development, Ben Widawsky
On Mon, Aug 26, 2013 at 1:43 PM, Stéphane Marchesin
<stephane.marchesin@gmail.com> wrote:
>
>
>
> On Mon, Aug 26, 2013 at 1:42 PM, Stéphane Marchesin
> <stephane.marchesin@gmail.com> wrote:
>>
>>
>>
>>
>> On Mon, Aug 26, 2013 at 12:58 PM, Chris Wilson <chris@chris-wilson.co.uk>
>> wrote:
>>>
>>> RCS flips do work on Iybridge+ so long as we can unmask the messages
>>> through DERRMR. However, there are quite a few workarounds mentioned
>>> regarding unmasking more than one event or triggering more than one
>>> message through DERRMR. Those workarounds in principle prevent us from
>>> performing pipelined flips (and asynchronous flips across multiple
>>> planes) and equally apply to the "known good" BCS ring. Given that it
>>> already appears to work, and also appears to work with unmasking all 3
>>> planes at once (and queuing flips across multiple planes), be brave.
>>>
>>> Bugzlla: https://bugs.freedesktop.org/show_bug.cgi?id=67600
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Lightly-tested-by: Stephane Marchesin <marchesin@icps.u-strasbg.fr>
>>
>>
>> You can go ahead and say Tested-by, I ran my usual tests for 3 hours and
>> it didn't crash/show an issue. It would crash in ~10-30 minutes with the
>> other patches.
>
>
> Oh actually this one is a bit different... Give me 3 hours :)
>
Tested-by: Stéphane Marchesin <marcheu@chromium.org>
> Stéphane
>
>>
>>
>> Stéphane
>>
>>>
>>> Cc: Stephane Marchesin <marchesin@icps.u-strasbg.fr>
>>> Cc: Ben Widawsky <ben@bwidawsk.net>
>>> ---
>>> drivers/gpu/drm/i915/i915_reg.h | 18 ++++++++++++++++
>>> drivers/gpu/drm/i915/intel_display.c | 40
>>> ++++++++++++++++++++++++++++--------
>>> 2 files changed, 49 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>>> b/drivers/gpu/drm/i915/i915_reg.h
>>> index c6f5009..df168f4 100644
>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>> @@ -230,6 +230,7 @@
>>> * address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
>>> */
>>> #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*x-1)
>>> +#define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*x-1)
>>> #define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */
>>> #define MI_FLUSH_DW_STORE_INDEX (1<<21)
>>> #define MI_INVALIDATE_TLB (1<<18)
>>> @@ -678,6 +679,23 @@
>>> #define FPGA_DBG_RM_NOCLAIM (1<<31)
>>>
>>> #define DERRMR 0x44050
>>> +#define DERRMR_PIPEA_SCANLINE (1<<0)
>>> +#define DERRMR_PIPEA_PRI_FLIP_DONE (1<<1)
>>> +#define DERRMR_PIPEA_SPR_FLIP_DONE (1<<2)
>>> +#define DERRMR_PIPEA_VBLANK (1<<3)
>>> +#define DERRMR_PIPEA_HBLANK (1<<5)
>>> +#define DERRMR_PIPEB_SCANLINE (1<<8)
>>> +#define DERRMR_PIPEB_PRI_FLIP_DONE (1<<9)
>>> +#define DERRMR_PIPEB_SPR_FLIP_DONE (1<<10)
>>> +#define DERRMR_PIPEB_VBLANK (1<<11)
>>> +#define DERRMR_PIPEB_HBLANK (1<<13)
>>> +/* Note that PIPEC is not a simple translation of PIPEA/PIPEB */
>>> +#define DERRMR_PIPEC_SCANLINE (1<<14)
>>> +#define DERRMR_PIPEC_PRI_FLIP_DONE (1<<15)
>>> +#define DERRMR_PIPEC_SPR_FLIP_DONE (1<<20)
>>> +#define DERRMR_PIPEC_VBLANK (1<<21)
>>> +#define DERRMR_PIPEC_HBLANK (1<<22)
>>> +
>>>
>>> /* GM45+ chicken bits -- debug workaround bits that may be required
>>> * for various sorts of correct behavior. The top 16 bits of each are
>>> diff --git a/drivers/gpu/drm/i915/intel_display.c
>>> b/drivers/gpu/drm/i915/intel_display.c
>>> index 9748dce..ffbcbd1 100644
>>> --- a/drivers/gpu/drm/i915/intel_display.c
>>> +++ b/drivers/gpu/drm/i915/intel_display.c
>>> @@ -7826,12 +7826,6 @@ err:
>>> return ret;
>>> }
>>>
>>> -/*
>>> - * On gen7 we currently use the blit ring because (in early silicon at
>>> least)
>>> - * the render ring doesn't give us interrpts for page flip completion,
>>> which
>>> - * means clients will hang after the first flip is queued. Fortunately
>>> the
>>> - * blit ring generates interrupts properly, so use it instead.
>>> - */
>>> static int intel_gen7_queue_flip(struct drm_device *dev,
>>> struct drm_crtc *crtc,
>>> struct drm_framebuffer *fb,
>>> @@ -7839,9 +7833,13 @@ static int intel_gen7_queue_flip(struct drm_device
>>> *dev,
>>> {
>>> struct drm_i915_private *dev_priv = dev->dev_private;
>>> struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>>> - struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
>>> + struct intel_ring_buffer *ring;
>>> uint32_t plane_bit = 0;
>>> - int ret;
>>> + int len, ret;
>>> +
>>> + ring = obj->ring;
>>> + if (ring == NULL || ring->id != RCS)
>>> + ring = &dev_priv->ring[BCS];
>>>
>>> ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
>>> if (ret)
>>> @@ -7863,10 +7861,34 @@ static int intel_gen7_queue_flip(struct
>>> drm_device *dev,
>>> goto err_unpin;
>>> }
>>>
>>> - ret = intel_ring_begin(ring, 4);
>>> + len = 4;
>>> + if (ring->id == RCS)
>>> + len += 6;
>>> +
>>> + ret = intel_ring_begin(ring, len);
>>> if (ret)
>>> goto err_unpin;
>>>
>>> + /* Unmask the flip-done completion message. Note that the bspec
>>> says that
>>> + * we should do this for both the BCS and RCS, and that we must
>>> not unmask
>>> + * more than one flip event at any time (or ensure that one flip
>>> message
>>> + * can be sent by waiting for flip-done prior to queueing new
>>> flips).
>>> + * Experimentation says that BCS works despite DERRMR masking all
>>> + * flip-done completion events and that unmasking all planes at
>>> once
>>> + * for the RCS also doesn't appear to drop events. Setting the
>>> DERRMR
>>> + * to zero does lead to lockups within MI_DISPLAY_FLIP.
>>> + */
>>> + if (ring->id == RCS) {
>>> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>>> + intel_ring_emit(ring, DERRMR);
>>> + intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
>>> + DERRMR_PIPEB_PRI_FLIP_DONE |
>>> + DERRMR_PIPEC_PRI_FLIP_DONE));
>>> + intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1));
>>> + intel_ring_emit(ring, DERRMR);
>>> + intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
>>> + }
>>> +
>>> intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
>>> intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
>>> intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) +
>>> intel_crtc->dspaddr_offset);
>>> --
>>> 1.8.4.rc3
>>
>>
>
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+
2013-08-26 19:58 ` [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+ Chris Wilson
2013-08-26 20:42 ` Stéphane Marchesin
@ 2013-08-29 19:07 ` Daniel Vetter
2013-09-04 9:28 ` Lee, Chon Ming
1 sibling, 1 reply; 12+ messages in thread
From: Daniel Vetter @ 2013-08-29 19:07 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx, Stephane Marchesin, Ben Widawsky
On Mon, Aug 26, 2013 at 08:58:12PM +0100, Chris Wilson wrote:
> RCS flips do work on Iybridge+ so long as we can unmask the messages
> through DERRMR. However, there are quite a few workarounds mentioned
> regarding unmasking more than one event or triggering more than one
> message through DERRMR. Those workarounds in principle prevent us from
> performing pipelined flips (and asynchronous flips across multiple
> planes) and equally apply to the "known good" BCS ring. Given that it
> already appears to work, and also appears to work with unmasking all 3
> planes at once (and queuing flips across multiple planes), be brave.
>
> Bugzlla: https://bugs.freedesktop.org/show_bug.cgi?id=67600
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Lightly-tested-by: Stephane Marchesin <marchesin@icps.u-strasbg.fr>
> Cc: Stephane Marchesin <marchesin@icps.u-strasbg.fr>
> Cc: Ben Widawsky <ben@bwidawsk.net>
Both patches merged to dinq.
-Daniel
> ---
> drivers/gpu/drm/i915/i915_reg.h | 18 ++++++++++++++++
> drivers/gpu/drm/i915/intel_display.c | 40 ++++++++++++++++++++++++++++--------
> 2 files changed, 49 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index c6f5009..df168f4 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -230,6 +230,7 @@
> * address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
> */
> #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*x-1)
> +#define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*x-1)
> #define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */
> #define MI_FLUSH_DW_STORE_INDEX (1<<21)
> #define MI_INVALIDATE_TLB (1<<18)
> @@ -678,6 +679,23 @@
> #define FPGA_DBG_RM_NOCLAIM (1<<31)
>
> #define DERRMR 0x44050
> +#define DERRMR_PIPEA_SCANLINE (1<<0)
> +#define DERRMR_PIPEA_PRI_FLIP_DONE (1<<1)
> +#define DERRMR_PIPEA_SPR_FLIP_DONE (1<<2)
> +#define DERRMR_PIPEA_VBLANK (1<<3)
> +#define DERRMR_PIPEA_HBLANK (1<<5)
> +#define DERRMR_PIPEB_SCANLINE (1<<8)
> +#define DERRMR_PIPEB_PRI_FLIP_DONE (1<<9)
> +#define DERRMR_PIPEB_SPR_FLIP_DONE (1<<10)
> +#define DERRMR_PIPEB_VBLANK (1<<11)
> +#define DERRMR_PIPEB_HBLANK (1<<13)
> +/* Note that PIPEC is not a simple translation of PIPEA/PIPEB */
> +#define DERRMR_PIPEC_SCANLINE (1<<14)
> +#define DERRMR_PIPEC_PRI_FLIP_DONE (1<<15)
> +#define DERRMR_PIPEC_SPR_FLIP_DONE (1<<20)
> +#define DERRMR_PIPEC_VBLANK (1<<21)
> +#define DERRMR_PIPEC_HBLANK (1<<22)
> +
>
> /* GM45+ chicken bits -- debug workaround bits that may be required
> * for various sorts of correct behavior. The top 16 bits of each are
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 9748dce..ffbcbd1 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -7826,12 +7826,6 @@ err:
> return ret;
> }
>
> -/*
> - * On gen7 we currently use the blit ring because (in early silicon at least)
> - * the render ring doesn't give us interrpts for page flip completion, which
> - * means clients will hang after the first flip is queued. Fortunately the
> - * blit ring generates interrupts properly, so use it instead.
> - */
> static int intel_gen7_queue_flip(struct drm_device *dev,
> struct drm_crtc *crtc,
> struct drm_framebuffer *fb,
> @@ -7839,9 +7833,13 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
> {
> struct drm_i915_private *dev_priv = dev->dev_private;
> struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> - struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
> + struct intel_ring_buffer *ring;
> uint32_t plane_bit = 0;
> - int ret;
> + int len, ret;
> +
> + ring = obj->ring;
> + if (ring == NULL || ring->id != RCS)
> + ring = &dev_priv->ring[BCS];
>
> ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
> if (ret)
> @@ -7863,10 +7861,34 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
> goto err_unpin;
> }
>
> - ret = intel_ring_begin(ring, 4);
> + len = 4;
> + if (ring->id == RCS)
> + len += 6;
> +
> + ret = intel_ring_begin(ring, len);
> if (ret)
> goto err_unpin;
>
> + /* Unmask the flip-done completion message. Note that the bspec says that
> + * we should do this for both the BCS and RCS, and that we must not unmask
> + * more than one flip event at any time (or ensure that one flip message
> + * can be sent by waiting for flip-done prior to queueing new flips).
> + * Experimentation says that BCS works despite DERRMR masking all
> + * flip-done completion events and that unmasking all planes at once
> + * for the RCS also doesn't appear to drop events. Setting the DERRMR
> + * to zero does lead to lockups within MI_DISPLAY_FLIP.
> + */
> + if (ring->id == RCS) {
> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> + intel_ring_emit(ring, DERRMR);
> + intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
> + DERRMR_PIPEB_PRI_FLIP_DONE |
> + DERRMR_PIPEC_PRI_FLIP_DONE));
> + intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1));
> + intel_ring_emit(ring, DERRMR);
> + intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
> + }
> +
> intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
> intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
> intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
> --
> 1.8.4.rc3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+
2013-08-29 19:07 ` Daniel Vetter
@ 2013-09-04 9:28 ` Lee, Chon Ming
2013-09-04 9:41 ` Daniel Vetter
2013-09-04 9:49 ` Chris Wilson
0 siblings, 2 replies; 12+ messages in thread
From: Lee, Chon Ming @ 2013-09-04 9:28 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx, Stephane Marchesin, Ben Widawsky
This patch causes VLV hang, look like ring buffer lockup.
This is the message.
[drm] stuck on render ring
I haven't look at the bspec for the different between VLV and Ivybridge on this
yet. Just see anyone have any clue why this might fail in VLV.
On 08/29 21:07, Daniel Vetter wrote:
> On Mon, Aug 26, 2013 at 08:58:12PM +0100, Chris Wilson wrote:
> > RCS flips do work on Iybridge+ so long as we can unmask the messages
> > through DERRMR. However, there are quite a few workarounds mentioned
> > regarding unmasking more than one event or triggering more than one
> > message through DERRMR. Those workarounds in principle prevent us from
> > performing pipelined flips (and asynchronous flips across multiple
> > planes) and equally apply to the "known good" BCS ring. Given that it
> > already appears to work, and also appears to work with unmasking all 3
> > planes at once (and queuing flips across multiple planes), be brave.
> >
> > Bugzlla: https://bugs.freedesktop.org/show_bug.cgi?id=67600
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Lightly-tested-by: Stephane Marchesin <marchesin@icps.u-strasbg.fr>
> > Cc: Stephane Marchesin <marchesin@icps.u-strasbg.fr>
> > Cc: Ben Widawsky <ben@bwidawsk.net>
>
> Both patches merged to dinq.
> -Daniel
>
> > ---
> > drivers/gpu/drm/i915/i915_reg.h | 18 ++++++++++++++++
> > drivers/gpu/drm/i915/intel_display.c | 40 ++++++++++++++++++++++++++++--------
> > 2 files changed, 49 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index c6f5009..df168f4 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -230,6 +230,7 @@
> > * address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
> > */
> > #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*x-1)
> > +#define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*x-1)
> > #define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */
> > #define MI_FLUSH_DW_STORE_INDEX (1<<21)
> > #define MI_INVALIDATE_TLB (1<<18)
> > @@ -678,6 +679,23 @@
> > #define FPGA_DBG_RM_NOCLAIM (1<<31)
> >
> > #define DERRMR 0x44050
> > +#define DERRMR_PIPEA_SCANLINE (1<<0)
> > +#define DERRMR_PIPEA_PRI_FLIP_DONE (1<<1)
> > +#define DERRMR_PIPEA_SPR_FLIP_DONE (1<<2)
> > +#define DERRMR_PIPEA_VBLANK (1<<3)
> > +#define DERRMR_PIPEA_HBLANK (1<<5)
> > +#define DERRMR_PIPEB_SCANLINE (1<<8)
> > +#define DERRMR_PIPEB_PRI_FLIP_DONE (1<<9)
> > +#define DERRMR_PIPEB_SPR_FLIP_DONE (1<<10)
> > +#define DERRMR_PIPEB_VBLANK (1<<11)
> > +#define DERRMR_PIPEB_HBLANK (1<<13)
> > +/* Note that PIPEC is not a simple translation of PIPEA/PIPEB */
> > +#define DERRMR_PIPEC_SCANLINE (1<<14)
> > +#define DERRMR_PIPEC_PRI_FLIP_DONE (1<<15)
> > +#define DERRMR_PIPEC_SPR_FLIP_DONE (1<<20)
> > +#define DERRMR_PIPEC_VBLANK (1<<21)
> > +#define DERRMR_PIPEC_HBLANK (1<<22)
> > +
> >
> > /* GM45+ chicken bits -- debug workaround bits that may be required
> > * for various sorts of correct behavior. The top 16 bits of each are
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index 9748dce..ffbcbd1 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -7826,12 +7826,6 @@ err:
> > return ret;
> > }
> >
> > -/*
> > - * On gen7 we currently use the blit ring because (in early silicon at least)
> > - * the render ring doesn't give us interrpts for page flip completion, which
> > - * means clients will hang after the first flip is queued. Fortunately the
> > - * blit ring generates interrupts properly, so use it instead.
> > - */
> > static int intel_gen7_queue_flip(struct drm_device *dev,
> > struct drm_crtc *crtc,
> > struct drm_framebuffer *fb,
> > @@ -7839,9 +7833,13 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
> > {
> > struct drm_i915_private *dev_priv = dev->dev_private;
> > struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> > - struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
> > + struct intel_ring_buffer *ring;
> > uint32_t plane_bit = 0;
> > - int ret;
> > + int len, ret;
> > +
> > + ring = obj->ring;
> > + if (ring == NULL || ring->id != RCS)
> > + ring = &dev_priv->ring[BCS];
> >
> > ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
> > if (ret)
> > @@ -7863,10 +7861,34 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
> > goto err_unpin;
> > }
> >
> > - ret = intel_ring_begin(ring, 4);
> > + len = 4;
> > + if (ring->id == RCS)
> > + len += 6;
> > +
> > + ret = intel_ring_begin(ring, len);
> > if (ret)
> > goto err_unpin;
> >
> > + /* Unmask the flip-done completion message. Note that the bspec says that
> > + * we should do this for both the BCS and RCS, and that we must not unmask
> > + * more than one flip event at any time (or ensure that one flip message
> > + * can be sent by waiting for flip-done prior to queueing new flips).
> > + * Experimentation says that BCS works despite DERRMR masking all
> > + * flip-done completion events and that unmasking all planes at once
> > + * for the RCS also doesn't appear to drop events. Setting the DERRMR
> > + * to zero does lead to lockups within MI_DISPLAY_FLIP.
> > + */
> > + if (ring->id == RCS) {
> > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> > + intel_ring_emit(ring, DERRMR);
> > + intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
> > + DERRMR_PIPEB_PRI_FLIP_DONE |
> > + DERRMR_PIPEC_PRI_FLIP_DONE));
> > + intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1));
> > + intel_ring_emit(ring, DERRMR);
> > + intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
> > + }
> > +
> > intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
> > intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
> > intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
> > --
> > 1.8.4.rc3
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+
2013-09-04 9:28 ` Lee, Chon Ming
@ 2013-09-04 9:41 ` Daniel Vetter
2013-09-04 9:49 ` Ville Syrjälä
2013-09-04 9:49 ` Lee, Chon Ming
2013-09-04 9:49 ` Chris Wilson
1 sibling, 2 replies; 12+ messages in thread
From: Daniel Vetter @ 2013-09-04 9:41 UTC (permalink / raw)
To: Lee, Chon Ming; +Cc: intel-gfx, Stephane Marchesin, Ben Widawsky
On Wed, Sep 4, 2013 at 11:28 AM, Lee, Chon Ming <chon.ming.lee@intel.com> wrote:
> This patch causes VLV hang, look like ring buffer lockup.
>
> This is the message.
> [drm] stuck on render ring
>
> I haven't look at the bspec for the different between VLV and Ivybridge on this
> yet. Just see anyone have any clue why this might fail in VLV.
I guess there's no DERRMR register on vlv? If that's the case I think
we just need to disable rcs flips on vlv.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+
2013-09-04 9:41 ` Daniel Vetter
@ 2013-09-04 9:49 ` Ville Syrjälä
2013-09-04 9:49 ` Lee, Chon Ming
1 sibling, 0 replies; 12+ messages in thread
From: Ville Syrjälä @ 2013-09-04 9:49 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx, Stephane Marchesin, Ben Widawsky
On Wed, Sep 04, 2013 at 11:41:57AM +0200, Daniel Vetter wrote:
> On Wed, Sep 4, 2013 at 11:28 AM, Lee, Chon Ming <chon.ming.lee@intel.com> wrote:
> > This patch causes VLV hang, look like ring buffer lockup.
> >
> > This is the message.
> > [drm] stuck on render ring
> >
> > I haven't look at the bspec for the different between VLV and Ivybridge on this
> > yet. Just see anyone have any clue why this might fail in VLV.
>
> I guess there's no DERRMR register on vlv? If that's the case I think
> we just need to disable rcs flips on vlv.
There's the DPFLIPSTAT register. But my understanding was that it only
affects flip done signalling for MI_WAIT stuff. Worth a shot maybe
anyway.
--
Ville Syrjälä
Intel OTC
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+
2013-09-04 9:41 ` Daniel Vetter
2013-09-04 9:49 ` Ville Syrjälä
@ 2013-09-04 9:49 ` Lee, Chon Ming
1 sibling, 0 replies; 12+ messages in thread
From: Lee, Chon Ming @ 2013-09-04 9:49 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx, Stephane Marchesin, Ben Widawsky
DERRMR is documented in vlv bspec.
Regards,
Chon Ming
On Sep 4, 2013, at 5:42 PM, "Daniel Vetter" <daniel@ffwll.ch> wrote:
> On Wed, Sep 4, 2013 at 11:28 AM, Lee, Chon Ming <chon.ming.lee@intel.com> wrote:
>> This patch causes VLV hang, look like ring buffer lockup.
>>
>> This is the message.
>> [drm] stuck on render ring
>>
>> I haven't look at the bspec for the different between VLV and Ivybridge on this
>> yet. Just see anyone have any clue why this might fail in VLV.
>
> I guess there's no DERRMR register on vlv? If that's the case I think
> we just need to disable rcs flips on vlv.
> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+
2013-09-04 9:28 ` Lee, Chon Ming
2013-09-04 9:41 ` Daniel Vetter
@ 2013-09-04 9:49 ` Chris Wilson
1 sibling, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2013-09-04 9:49 UTC (permalink / raw)
To: Lee, Chon Ming; +Cc: intel-gfx, Stephane Marchesin, Ben Widawsky
On Wed, Sep 04, 2013 at 05:28:22PM +0800, Lee, Chon Ming wrote:
> This patch causes VLV hang, look like ring buffer lockup.
>
> This is the message.
> [drm] stuck on render ring
>
> I haven't look at the bspec for the different between VLV and Ivybridge on this
> yet. Just see anyone have any clue why this might fail in VLV.
Hmm, vsync on vlv behaves like snb not ivb, so flipping may be snb as
well? Bspec confirms, MI_DISPLAY_FLIP is invalid for RCS on VLV.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2013-09-04 9:50 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-08-26 19:58 [PATCH 1/2] drm/i915: Embed the ring->private within the struct intel_ring_buffer Chris Wilson
2013-08-26 19:58 ` [PATCH 2/2] drm/i915: Use RCS flips on Ivybridge+ Chris Wilson
2013-08-26 20:42 ` Stéphane Marchesin
2013-08-26 20:43 ` Stéphane Marchesin
2013-08-26 20:49 ` Chris Wilson
2013-08-29 17:11 ` Stéphane Marchesin
2013-08-29 19:07 ` Daniel Vetter
2013-09-04 9:28 ` Lee, Chon Ming
2013-09-04 9:41 ` Daniel Vetter
2013-09-04 9:49 ` Ville Syrjälä
2013-09-04 9:49 ` Lee, Chon Ming
2013-09-04 9:49 ` Chris Wilson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox