* [PATCH 1/4] introduce intel_ring_buffer structure
@ 2010-05-05 3:17 Zou Nan hai
2010-05-05 18:23 ` Daniel Vetter
0 siblings, 1 reply; 24+ messages in thread
From: Zou Nan hai @ 2010-05-05 3:17 UTC (permalink / raw)
To: Anholt, Eric, Intel GFX
introduce intel_ring_buffer structure.
sequential number, IRQ logic and hardware status page
were included in the intel_ring_buffer structure
Signed-off-by: Xiang Hai hao <haihao.xiang@intel.com>
Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/i915_drv.h | 7 +++++++
drivers/gpu/drm/i915/i915_irq.c | 4 ++--
include/drm/i915_drm.h | 4 +++-
4 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 9929f84..bf8d097 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -22,6 +22,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \
intel_fb.o \
intel_tv.o \
intel_dvo.o \
+ intel_ringbuffer.o \
intel_overlay.o \
dvo_ch7xxx.o \
dvo_ch7017.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 790fef3..32953ad 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -32,6 +32,7 @@
#include "i915_reg.h"
#include "intel_bios.h"
+#include "intel_ringbuffer.h"
#include <linux/io-mapping.h>
/* General customization:
@@ -838,6 +839,12 @@ extern u32 gm45_get_vblank_counter(struct drm_device *dev, int crtc);
extern int i915_vblank_swap(struct drm_device *dev, void *data,
struct drm_file *file_priv);
extern void i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask);
+extern void i915_disable_irq(drm_i915_private_t *dev_priv, u32 mask);
+extern void ironlake_enable_graphics_irq(drm_i915_private_t *dev_priv,
+ u32 mask);
+extern void ironlake_disable_graphics_irq(drm_i915_private_t *dev_priv,
+ u32 mask);
+
void
i915_enable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7701cbd..301702a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -73,7 +73,7 @@ ironlake_enable_graphics_irq(drm_i915_private_t *dev_priv, u32 mask)
}
}
-static inline void
+void
ironlake_disable_graphics_irq(drm_i915_private_t *dev_priv, u32 mask)
{
if ((dev_priv->gt_irq_mask_reg & mask) != mask) {
@@ -114,7 +114,7 @@ i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask)
}
}
-static inline void
+void
i915_disable_irq(drm_i915_private_t *dev_priv, u32 mask)
{
if ((dev_priv->irq_mask_reg & mask) != mask) {
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index b64a8d7..e916870 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -616,7 +616,9 @@ struct drm_i915_gem_execbuffer2 {
__u32 num_cliprects;
/** This is a struct drm_clip_rect *cliprects */
__u64 cliprects_ptr;
- __u64 flags; /* currently unused */
+#define I915_EXEC_RENDER (1<<0)
+#define I915_EXEC_BSD (1<<1)
+ __u64 flags;
__u64 rsvd1;
__u64 rsvd2;
};
--
1.7.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-05 3:17 Zou Nan hai
@ 2010-05-05 18:23 ` Daniel Vetter
2010-05-06 2:25 ` Zou, Nanhai
0 siblings, 1 reply; 24+ messages in thread
From: Daniel Vetter @ 2010-05-05 18:23 UTC (permalink / raw)
To: Zou Nan hai; +Cc: Intel GFX, Eric, Anholt
Looks like you've forgotten to actually add intel_ringbuffer.c - it's
missing form the patch ;)
Please resend.
Cheers, Daniel
--
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 1/4] introduce intel_ring_buffer structure
@ 2010-05-06 1:20 Zou Nan hai
2010-05-06 1:20 ` [PATCH 2/4] convert render engine to use intel_ring_buffer Zou Nan hai
` (4 more replies)
0 siblings, 5 replies; 24+ messages in thread
From: Zou Nan hai @ 2010-05-06 1:20 UTC (permalink / raw)
To: Anholt Eric, Intel GFX
[-- Attachment #1: Type: text/plain, Size: 23129 bytes --]
introduce intel_ring_buffer structure.
sequential number, IRQ logic and hardware status page
were included in the intel_ring_buffer structure
Signed-off-by: Xiang Hai hao <haihao.xiang@intel.com>
Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/i915_drv.h | 7 +
drivers/gpu/drm/i915/i915_irq.c | 4 +-
drivers/gpu/drm/i915/intel_ringbuffer.c | 576 +++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/intel_ringbuffer.h | 119 +++++++
include/drm/i915_drm.h | 4 +-
6 files changed, 708 insertions(+), 3 deletions(-)
create mode 100644 drivers/gpu/drm/i915/intel_ringbuffer.c
create mode 100644 drivers/gpu/drm/i915/intel_ringbuffer.h
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 9929f84..bf8d097 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -22,6 +22,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \
intel_fb.o \
intel_tv.o \
intel_dvo.o \
+ intel_ringbuffer.o \
intel_overlay.o \
dvo_ch7xxx.o \
dvo_ch7017.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 790fef3..32953ad 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -32,6 +32,7 @@
#include "i915_reg.h"
#include "intel_bios.h"
+#include "intel_ringbuffer.h"
#include <linux/io-mapping.h>
/* General customization:
@@ -838,6 +839,12 @@ extern u32 gm45_get_vblank_counter(struct drm_device *dev, int crtc);
extern int i915_vblank_swap(struct drm_device *dev, void *data,
struct drm_file *file_priv);
extern void i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask);
+extern void i915_disable_irq(drm_i915_private_t *dev_priv, u32 mask);
+extern void ironlake_enable_graphics_irq(drm_i915_private_t *dev_priv,
+ u32 mask);
+extern void ironlake_disable_graphics_irq(drm_i915_private_t *dev_priv,
+ u32 mask);
+
void
i915_enable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7701cbd..301702a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -73,7 +73,7 @@ ironlake_enable_graphics_irq(drm_i915_private_t *dev_priv, u32 mask)
}
}
-static inline void
+void
ironlake_disable_graphics_irq(drm_i915_private_t *dev_priv, u32 mask)
{
if ((dev_priv->gt_irq_mask_reg & mask) != mask) {
@@ -114,7 +114,7 @@ i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask)
}
}
-static inline void
+void
i915_disable_irq(drm_i915_private_t *dev_priv, u32 mask)
{
if ((dev_priv->irq_mask_reg & mask) != mask) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
new file mode 100644
index 0000000..65c540b
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -0,0 +1,576 @@
+/*
+ * Copyright © 20010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zou Nan hai <nanhai.zou@intel.com>
+ * Xiang Hai hao<haihao.xiang@intel.com>
+ *
+ */
+#include "drmP.h"
+#include "drm.h"
+#include "i915_drv.h"
+#include "i915_drm.h"
+#include "i915_trace.h"
+
+#define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
+static void
+render_ring_flush(struct drm_device *dev,
+ struct intel_ring_buffer *ring,
+ u32 invalidate_domains,
+ u32 flush_domains)
+{
+#if WATCH_EXEC
+ DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
+ invalidate_domains, flush_domains);
+#endif
+ u32 cmd;
+ trace_i915_gem_request_flush(dev, ring->next_seqno,
+ invalidate_domains, flush_domains);
+
+ if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) {
+ /*
+ * read/write caches:
+ *
+ * I915_GEM_DOMAIN_RENDER is always invalidated, but is
+ * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
+ * also flushed at 2d versus 3d pipeline switches.
+ *
+ * read-only caches:
+ *
+ * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
+ * MI_READ_FLUSH is set, and is always flushed on 965.
+ *
+ * I915_GEM_DOMAIN_COMMAND may not exist?
+ *
+ * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
+ * invalidated when MI_EXE_FLUSH is set.
+ *
+ * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
+ * invalidated with every MI_FLUSH.
+ *
+ * TLBs:
+ *
+ * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
+ * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
+ * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
+ * are flushed at any MI_FLUSH.
+ */
+
+ cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
+ if ((invalidate_domains|flush_domains) &
+ I915_GEM_DOMAIN_RENDER)
+ cmd &= ~MI_NO_WRITE_FLUSH;
+ if (!IS_I965G(dev)) {
+ /*
+ * On the 965, the sampler cache always gets flushed
+ * and this bit is reserved.
+ */
+ if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
+ cmd |= MI_READ_FLUSH;
+ }
+ if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
+ cmd |= MI_EXE_FLUSH;
+
+#if WATCH_EXEC
+ DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd);
+#endif
+ intel_ring_begin(dev, ring, 8);
+ intel_ring_emit(dev, ring, cmd);
+ intel_ring_emit(dev, ring, MI_NOOP);
+ intel_ring_advance(dev, ring);
+ }
+}
+
+static unsigned int render_ring_get_head(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ return I915_READ(PRB0_HEAD) & HEAD_ADDR;
+}
+
+static unsigned int render_ring_get_tail(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ return I915_READ(PRB0_TAIL) & TAIL_ADDR;
+}
+
+static unsigned int render_ring_get_active_head(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ u32 acthd_reg = IS_I965G(dev) ? ACTHD_I965 : ACTHD;
+
+ return I915_READ(acthd_reg);
+}
+
+static void render_ring_advance_ring(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ I915_WRITE(PRB0_TAIL, ring->tail);
+}
+
+
+static int init_render_ring(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ u32 head;
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ struct drm_i915_gem_object *obj_priv;
+ obj_priv = ring->gem_object->driver_private;
+
+ /* Stop the ring if it's running. */
+ I915_WRITE(PRB0_CTL, 0);
+ I915_WRITE(PRB0_HEAD, 0);
+ I915_WRITE(PRB0_TAIL, 0);
+
+ /* Initialize the ring. */
+ I915_WRITE(PRB0_START, obj_priv->gtt_offset);
+ head = ring->get_head(dev, ring);
+
+ /* G45 ring initialization fails to reset head to zero */
+ if (head != 0) {
+ DRM_ERROR("%s head not reset to zero "
+ "ctl %08x head %08x tail %08x start %08x\n",
+ ring->name,
+ I915_READ(PRB0_CTL),
+ I915_READ(PRB0_HEAD),
+ I915_READ(PRB0_TAIL),
+ I915_READ(PRB0_START));
+ I915_WRITE(PRB0_HEAD, 0);
+
+ DRM_ERROR("%s head forced to zero "
+ "ctl %08x head %08x tail %08x start %08x\n",
+ ring->name,
+ I915_READ(PRB0_CTL),
+ I915_READ(PRB0_HEAD),
+ I915_READ(PRB0_TAIL),
+ I915_READ(PRB0_START));
+ }
+
+ I915_WRITE(PRB0_CTL,
+ ((ring->gem_object->size - PAGE_SIZE) & RING_NR_PAGES)
+ | RING_NO_REPORT | RING_VALID);
+
+ head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
+ /* If the head is still not zero, the ring is dead */
+ if (head != 0) {
+ DRM_ERROR("%s initialization failed "
+ "ctl %08x head %08x tail %08x start %08x\n",
+ ring->name,
+ I915_READ(PRB0_CTL),
+ I915_READ(PRB0_HEAD),
+ I915_READ(PRB0_TAIL),
+ I915_READ(PRB0_START));
+ return -EIO;
+ }
+
+ return 0;
+}
+
+
+static u32
+render_ring_add_request(struct drm_device *dev,
+ struct intel_ring_buffer *ring,
+ struct drm_file *file_priv,
+ u32 flush_domains)
+{
+ u32 seqno;
+ seqno = intel_ring_get_seqno(dev, ring);
+
+ intel_ring_begin(dev, ring, 4);
+ intel_ring_emit(dev, ring, MI_STORE_DWORD_INDEX);
+ intel_ring_emit(dev, ring,
+ I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
+ intel_ring_emit(dev, ring, seqno);
+ intel_ring_emit(dev, ring, MI_USER_INTERRUPT);
+ intel_ring_advance(dev, ring);
+
+ DRM_DEBUG_DRIVER("%s %d\n", ring->name, seqno);
+
+ return seqno;
+}
+
+static u32
+render_ring_get_gem_seqno(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
+}
+
+static void
+render_ring_get_user_irq(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
+ if (dev->irq_enabled && (++ring->user_irq_refcount == 1)) {
+ if (IS_IRONLAKE(dev))
+ ironlake_enable_graphics_irq(dev_priv,
+ GT_USER_INTERRUPT);
+ else
+ i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
+ }
+ spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags);
+
+}
+
+static void
+render_ring_put_user_irq(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
+ BUG_ON(dev->irq_enabled && ring->user_irq_refcount <= 0);
+ if (dev->irq_enabled && (--ring->user_irq_refcount == 0)) {
+ if (IS_IRONLAKE(dev))
+ ironlake_disable_graphics_irq(dev_priv,
+ GT_USER_INTERRUPT);
+ else
+ i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
+ }
+ spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags);
+}
+
+static void render_setup_status_page(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ I915_WRITE(HWS_PGA, ring->status_page.gfx_addr);
+ (void)I915_READ(HWS_PGA);
+}
+
+
+static int
+render_ring_dispatch_gem_execbuffer(struct drm_device *dev,
+ struct intel_ring_buffer *ring,
+ struct drm_i915_gem_execbuffer2 *exec,
+ struct drm_clip_rect *cliprects,
+ uint64_t exec_offset)
+{
+ int nbox = exec->num_cliprects;
+ int i = 0, count;
+ uint32_t exec_start, exec_len;
+ exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
+ exec_len = (uint32_t) exec->batch_len;
+
+ count = nbox ? nbox : 1;
+
+ for (i = 0; i < count; i++) {
+ if (i < nbox) {
+ int ret = i915_emit_box(dev, cliprects, i,
+ exec->DR1, exec->DR4);
+ if (ret)
+ return ret;
+ }
+
+ if (IS_I830(dev) || IS_845G(dev)) {
+ intel_ring_begin(dev, ring, 4);
+ intel_ring_emit(dev, ring, MI_BATCH_BUFFER);
+ intel_ring_emit(dev, ring, exec_start | MI_BATCH_NON_SECURE);
+ intel_ring_emit(dev, ring, exec_start + exec_len - 4);
+ intel_ring_emit(dev, ring, 0);
+ } else {
+ intel_ring_begin(dev, ring, 4);
+ if (IS_I965G(dev)) {
+ intel_ring_emit(dev, ring, MI_BATCH_BUFFER_START |
+ (2 << 6) | MI_BATCH_NON_SECURE_I965);
+ intel_ring_emit(dev, ring, exec_start);
+ } else {
+ intel_ring_emit(dev, ring, MI_BATCH_BUFFER_START |
+ (2 << 6));
+ intel_ring_emit(dev, ring, exec_start | MI_BATCH_NON_SECURE);
+ }
+ }
+ intel_ring_advance(dev, ring);
+ }
+
+ /* XXX breadcrumb */
+ return 0;
+}
+
+static void cleanup_status_page(struct drm_device *dev, struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ struct drm_gem_object *obj;
+ struct drm_i915_gem_object *obj_priv;
+
+ obj = ring->status_page.obj;
+ if (obj == NULL)
+ return;
+ obj_priv = obj->driver_private;
+
+ kunmap(obj_priv->pages[0]);
+ i915_gem_object_unpin(obj);
+ drm_gem_object_unreference(obj);
+ ring->status_page.obj = NULL;
+
+ memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
+}
+static int init_status_page(struct drm_device *dev, struct intel_ring_buffer *ring)
+{
+ struct drm_i915_gem_object *obj_priv;
+ struct drm_gem_object *obj;
+ int ret;
+ drm_i915_private_t *dev_priv = dev->dev_private;
+
+ obj = drm_gem_object_alloc(dev, PAGE_SIZE*10);
+ if (obj == NULL)
+ return -ENOMEM;
+
+ obj_priv = obj->driver_private;
+ obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
+
+ ret = i915_gem_object_pin(obj, PAGE_SIZE);
+ if (ret != 0) {
+ drm_gem_object_unreference(obj);
+ return ret;
+ }
+
+ ring->status_page.gfx_addr = obj_priv->gtt_offset;
+
+ ring->status_page.page_addr = kmap(obj_priv->pages[0]);
+ if (ring->status_page.page_addr == NULL) {
+ memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
+ i915_gem_object_unpin(obj);
+ drm_gem_object_unreference(obj);
+ return -EINVAL;
+ }
+ ring->status_page.obj = obj;
+ memset(ring->status_page.page_addr, 0, PAGE_SIZE);
+ ring->setup_status_page(dev, ring);
+ DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", ring->name, ring->status_page.gfx_addr);
+ return 0;
+}
+
+
+int intel_init_ring_buffer(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ int ret;
+ struct drm_i915_gem_object *obj_priv;
+ struct drm_gem_object *obj;
+ ring->dev = dev;
+
+ if (I915_NEED_GFX_HWS(dev)) {
+ ret = init_status_page(dev, ring);
+ if (ret)
+ return ret;
+ }
+
+ obj = drm_gem_object_alloc(dev, ring->size);
+ if (obj == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ ring->gem_object = obj;
+
+ ret = i915_gem_object_pin(obj, ring->alignment);
+ if (ret != 0) {
+ drm_gem_object_unreference(obj);
+ goto cleanup;
+ }
+
+ obj_priv = obj->driver_private;
+ ring->map.size = ring->size;
+ ring->map.offset = dev->agp->base + obj_priv->gtt_offset;
+ ring->map.type = 0;
+ ring->map.flags = 0;
+ ring->map.mtrr = 0;
+
+ drm_core_ioremap_wc(&ring->map, dev);
+
+ if (ring->map.handle == NULL) {
+ i915_gem_object_unpin(obj);
+ drm_gem_object_unreference(obj);
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ ring->virtual_start = ring->map.handle;
+ ret = ring->init(dev, ring);
+ if (ret != 0) {
+ intel_cleanup_ring_buffer(dev, ring);
+ return ret;
+ }
+
+ ring->head = ring->get_head(dev, ring);
+ ring->tail = ring->get_tail(dev, ring);
+ ring->space = ring->head - (ring->tail + 8);
+ if (ring->space < 0)
+ ring->space += ring->size;
+ INIT_LIST_HEAD(&ring->active_list);
+ INIT_LIST_HEAD(&ring->request_list);
+ return ret;
+cleanup:
+ cleanup_status_page(dev, ring);
+ return ret;
+}
+
+void intel_cleanup_ring_buffer(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ if (ring->gem_object == NULL)
+ return;
+
+ drm_core_ioremapfree(&ring->map, dev);
+
+ i915_gem_object_unpin(ring->gem_object);
+ drm_gem_object_unreference(ring->gem_object);
+ ring->gem_object = NULL;
+ cleanup_status_page(dev, ring);
+}
+
+int intel_wrap_ring_buffer(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ unsigned int *virt;
+ int rem;
+ rem = ring->size - ring->tail;
+
+ if (ring->space < rem) {
+ int ret = intel_wait_ring_buffer(dev, ring, rem);
+ if (ret)
+ return ret;
+ }
+
+ virt = (unsigned int *)(ring->virtual_start + ring->tail);
+ rem /= 4;
+ while (rem--)
+ *virt++ = MI_NOOP;
+
+ ring->tail = 0;
+
+ return 0;
+}
+
+int intel_wait_ring_buffer(struct drm_device *dev,
+ struct intel_ring_buffer *ring, int n)
+{
+ unsigned long end;
+
+ trace_i915_ring_wait_begin (dev);
+ end = jiffies + 3 * HZ;
+ do {
+ ring->head = ring->get_head(dev, ring);
+ ring->space = ring->head - (ring->tail + 8);
+ if (ring->space < 0)
+ ring->space += ring->size;
+ if (ring->space >= n) {
+ trace_i915_ring_wait_end (dev);
+ return 0;
+ }
+ yield();
+ } while(!time_after(jiffies, end));
+ trace_i915_ring_wait_end (dev);
+
+ return -EBUSY;
+}
+
+void intel_ring_begin(struct drm_device *dev,
+ struct intel_ring_buffer *ring, int n)
+{
+ if (unlikely(ring->tail + n > ring->size))
+ intel_wrap_ring_buffer(dev, ring);
+ if (unlikely(ring->space < n))
+ intel_wait_ring_buffer(dev, ring, n);
+}
+
+void intel_ring_emit(struct drm_device *dev,
+ struct intel_ring_buffer *ring, unsigned int data)
+{
+ unsigned int *virt = ring->virtual_start + ring->tail;
+ *virt = data;
+ ring->tail += 4;
+ ring->tail &= ring->size - 1;
+ ring->space -= 4;
+}
+
+void intel_ring_advance(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ ring->advance_ring(dev, ring);
+}
+
+void intel_fill_struct(struct drm_device *dev,
+ struct intel_ring_buffer *ring,
+ void *data,
+ unsigned int len)
+{
+ unsigned int *virt = ring->virtual_start + ring->tail;
+ BUG_ON((len&~(4-1)) != 0);
+ intel_ring_begin (dev, ring, len);
+ memcpy(virt, data, len);
+ ring->tail += len;
+ ring->tail &= ring->size - 1;
+ ring->space -= len;
+ intel_ring_advance (dev, ring);
+}
+
+u32 intel_ring_get_seqno(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ u32 seqno;
+ seqno = ring->next_seqno;
+
+ /* reserve 0 for non-seqno */
+ if (++ring->next_seqno == 0)
+ ring->next_seqno = 1;
+ return seqno;
+}
+
+struct intel_ring_buffer render_ring = {
+ .name = "render ring",
+ .ring_flag = I915_EXEC_RENDER,
+ .size = 32 * PAGE_SIZE,
+ .alignment = PAGE_SIZE,
+ .virtual_start = NULL,
+ .dev = NULL,
+ .gem_object = NULL,
+ .head = 0,
+ .tail = 0,
+ .space = 0,
+ .next_seqno = 1,
+ .user_irq_refcount = 0,
+ .irq_gem_seqno = 0,
+ .waiting_gem_seqno = 0,
+ .setup_status_page = render_setup_status_page,
+ .init = init_render_ring,
+ .get_head = render_ring_get_head,
+ .get_tail = render_ring_get_tail,
+ .get_active_head = render_ring_get_active_head,
+ .advance_ring = render_ring_advance_ring,
+ .flush = render_ring_flush,
+ .add_request = render_ring_add_request,
+ .get_gem_seqno = render_ring_get_gem_seqno,
+ .user_irq_get = render_ring_get_user_irq,
+ .user_irq_put = render_ring_put_user_irq,
+ .dispatch_gem_execbuffer = render_ring_dispatch_gem_execbuffer,
+ .status_page = {NULL, 0, NULL},
+ .map = {0,}
+};
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
new file mode 100644
index 0000000..e3b40d1
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -0,0 +1,119 @@
+#ifndef _INTEL_RINGBUFFER_H_
+#define _INTEL_RINGBUFFER_H_
+
+struct intel_hw_status_page {
+ void *page_addr;
+ unsigned int gfx_addr;
+ struct drm_gem_object *obj;
+};
+
+struct drm_i915_gem_execbuffer2;
+struct intel_ring_buffer {
+ const char *name;
+ unsigned int ring_flag;
+ unsigned long size;
+ unsigned int alignment;
+ void *virtual_start;
+ struct drm_device *dev;
+ struct drm_gem_object *gem_object;
+
+ unsigned int head;
+ unsigned int tail;
+ unsigned int space;
+ u32 next_seqno;
+ struct intel_hw_status_page status_page;
+
+ u32 irq_gem_seqno; /* last seq seem at irq time */
+ u32 waiting_gem_seqno;
+ int user_irq_refcount;
+ void (*user_irq_get)(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
+ void (*user_irq_put)(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
+ void (*setup_status_page)(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
+
+ int (*init)(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
+
+ unsigned int (*get_head)(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
+ unsigned int (*get_tail)(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
+ unsigned int (*get_active_head)(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
+ void (*advance_ring)(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
+ void (*flush)(struct drm_device *dev,
+ struct intel_ring_buffer *ring,
+ u32 invalidate_domains,
+ u32 flush_domains);
+ u32 (*add_request)(struct drm_device *dev,
+ struct intel_ring_buffer *ring,
+ struct drm_file *file_priv,
+ u32 flush_domains);
+ u32 (*get_gem_seqno)(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
+ int (*dispatch_gem_execbuffer)(struct drm_device *dev,
+ struct intel_ring_buffer *ring,
+ struct drm_i915_gem_execbuffer2 *exec,
+ struct drm_clip_rect *cliprects,
+ uint64_t exec_offset);
+
+ /**
+ * List of objects currently involved in rendering from the
+ * ringbuffer.
+ *
+ * Includes buffers having the contents of their GPU caches
+ * flushed, not necessarily primitives. last_rendering_seqno
+ * represents when the rendering involved will be completed.
+ *
+ * A reference is held on the buffer while on this list.
+ */
+ struct list_head active_list;
+
+ /**
+ * List of breadcrumbs associated with GPU requests currently
+ * outstanding.
+ */
+ struct list_head request_list;
+
+ wait_queue_head_t irq_queue;
+ drm_local_map_t map;
+};
+
+static inline u32
+intel_read_status_page(struct intel_ring_buffer *ring,
+ int reg)
+{
+ u32 *regs = ring->status_page.page_addr;
+ return regs[reg];
+}
+
+int intel_init_ring_buffer(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
+void intel_cleanup_ring_buffer(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
+int intel_wait_ring_buffer(struct drm_device *dev,
+ struct intel_ring_buffer *ring, int n);
+int intel_wrap_ring_buffer(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
+void intel_ring_begin(struct drm_device *dev,
+ struct intel_ring_buffer *ring, int n);
+void intel_ring_emit(struct drm_device *dev,
+ struct intel_ring_buffer *ring, u32 data);
+void intel_fill_struct(struct drm_device *dev,
+ struct intel_ring_buffer *ring,
+ void *data,
+ unsigned int len);
+void intel_ring_advance(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
+
+u32 intel_ring_get_seqno(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
+
+extern struct intel_ring_buffer render_ring;
+extern struct intel_ring_buffer bsd_ring;
+
+#endif /* _INTEL_RINGBUFFER_H_ */
+
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index b64a8d7..e916870 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -616,7 +616,9 @@ struct drm_i915_gem_execbuffer2 {
__u32 num_cliprects;
/** This is a struct drm_clip_rect *cliprects */
__u64 cliprects_ptr;
- __u64 flags; /* currently unused */
+#define I915_EXEC_RENDER (1<<0)
+#define I915_EXEC_BSD (1<<1)
+ __u64 flags;
__u64 rsvd1;
__u64 rsvd2;
};
--
1.7.1
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PATCH 2/4] convert render engine to use intel_ring_buffer
2010-05-06 1:20 [PATCH 1/4] introduce intel_ring_buffer structure Zou Nan hai
@ 2010-05-06 1:20 ` Zou Nan hai
2010-05-06 1:20 ` [PATCH 3/4] add BSD ring buffer support Zou Nan hai
` (3 subsequent siblings)
4 siblings, 0 replies; 24+ messages in thread
From: Zou Nan hai @ 2010-05-06 1:20 UTC (permalink / raw)
To: Anholt Eric, Intel GFX
convert ring buffer used by render engine to intel_ring_buffer
structure.
Signed-off-by: Xiang Hai hao <haihao.xiang@intel.com>
Signed-off-by: Zou Nan hai <nanhai.zou@@intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 14 +-
drivers/gpu/drm/i915/i915_dma.c | 159 ++++++-----------------
drivers/gpu/drm/i915/i915_drv.c | 27 +----
drivers/gpu/drm/i915/i915_drv.h | 35 ++---
drivers/gpu/drm/i915/i915_gem.c | 214 ++-----------------------------
drivers/gpu/drm/i915/i915_irq.c | 16 ++-
drivers/gpu/drm/i915/intel_display.c | 1 -
drivers/gpu/drm/i915/intel_overlay.c | 9 --
drivers/gpu/drm/i915/intel_ringbuffer.c | 24 +++-
9 files changed, 101 insertions(+), 398 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index bb3a4a8..ea81ec1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -143,7 +143,7 @@ static int i915_gem_seqno_info(struct seq_file *m, void *data)
struct drm_device *dev = node->minor->dev;
drm_i915_private_t *dev_priv = dev->dev_private;
- if (dev_priv->hw_status_page != NULL) {
+ if (dev_priv->render_ring.status_page.page_addr != NULL) {
seq_printf(m, "Current sequence: %d\n",
i915_get_gem_seqno(dev));
} else {
@@ -195,7 +195,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
}
seq_printf(m, "Interrupts received: %d\n",
atomic_read(&dev_priv->irq_received));
- if (dev_priv->hw_status_page != NULL) {
+ if (dev_priv->render_ring.status_page.page_addr != NULL) {
seq_printf(m, "Current sequence: %d\n",
i915_get_gem_seqno(dev));
} else {
@@ -251,7 +251,7 @@ static int i915_hws_info(struct seq_file *m, void *data)
int i;
volatile u32 *hws;
- hws = (volatile u32 *)dev_priv->hw_status_page;
+ hws = (volatile u32 *)dev_priv->render_ring.status_page.page_addr;
if (hws == NULL)
return 0;
@@ -317,14 +317,14 @@ static int i915_ringbuffer_data(struct seq_file *m, void *data)
u8 *virt;
uint32_t *ptr, off;
- if (!dev_priv->ring.ring_obj) {
+ if (!dev_priv->render_ring.gem_object) {
seq_printf(m, "No ringbuffer setup\n");
return 0;
}
- virt = dev_priv->ring.virtual_start;
+ virt = dev_priv->render_ring.virtual_start;
- for (off = 0; off < dev_priv->ring.Size; off += 4) {
+ for (off = 0; off < dev_priv->render_ring.size; off += 4) {
ptr = (uint32_t *)(virt + off);
seq_printf(m, "%08x : %08x\n", off, *ptr);
}
@@ -344,7 +344,7 @@ static int i915_ringbuffer_info(struct seq_file *m, void *data)
seq_printf(m, "RingHead : %08x\n", head);
seq_printf(m, "RingTail : %08x\n", tail);
- seq_printf(m, "RingSize : %08lx\n", dev_priv->ring.Size);
+ seq_printf(m, "RingSize : %08lx\n", dev_priv->render_ring.size);
seq_printf(m, "Acthd : %08x\n", I915_READ(IS_I965G(dev) ? ACTHD_I965 : ACTHD));
return 0;
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 75248be..88ec88c 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -39,84 +39,6 @@
#include <linux/pnp.h>
#include <linux/vga_switcheroo.h>
-/* Really want an OS-independent resettable timer. Would like to have
- * this loop run for (eg) 3 sec, but have the timer reset every time
- * the head pointer changes, so that EBUSY only happens if the ring
- * actually stalls for (eg) 3 seconds.
- */
-int i915_wait_ring(struct drm_device * dev, int n, const char *caller)
-{
- drm_i915_private_t *dev_priv = dev->dev_private;
- drm_i915_ring_buffer_t *ring = &(dev_priv->ring);
- u32 acthd_reg = IS_I965G(dev) ? ACTHD_I965 : ACTHD;
- u32 last_acthd = I915_READ(acthd_reg);
- u32 acthd;
- u32 last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
- int i;
-
- trace_i915_ring_wait_begin (dev);
-
- for (i = 0; i < 100000; i++) {
- ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
- acthd = I915_READ(acthd_reg);
- ring->space = ring->head - (ring->tail + 8);
- if (ring->space < 0)
- ring->space += ring->Size;
- if (ring->space >= n) {
- trace_i915_ring_wait_end (dev);
- return 0;
- }
-
- if (dev->primary->master) {
- struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
- if (master_priv->sarea_priv)
- master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
- }
-
-
- if (ring->head != last_head)
- i = 0;
- if (acthd != last_acthd)
- i = 0;
-
- last_head = ring->head;
- last_acthd = acthd;
- msleep_interruptible(10);
-
- }
-
- trace_i915_ring_wait_end (dev);
- return -EBUSY;
-}
-
-/* As a ringbuffer is only allowed to wrap between instructions, fill
- * the tail with NOOPs.
- */
-int i915_wrap_ring(struct drm_device *dev)
-{
- drm_i915_private_t *dev_priv = dev->dev_private;
- volatile unsigned int *virt;
- int rem;
-
- rem = dev_priv->ring.Size - dev_priv->ring.tail;
- if (dev_priv->ring.space < rem) {
- int ret = i915_wait_ring(dev, rem, __func__);
- if (ret)
- return ret;
- }
- dev_priv->ring.space -= rem;
-
- virt = (unsigned int *)
- (dev_priv->ring.virtual_start + dev_priv->ring.tail);
- rem /= 4;
- while (rem--)
- *virt++ = MI_NOOP;
-
- dev_priv->ring.tail = 0;
-
- return 0;
-}
-
/**
* Sets up the hardware status page for devices that need a physical address
* in the register.
@@ -132,14 +54,15 @@ static int i915_init_phys_hws(struct drm_device *dev)
DRM_ERROR("Can not allocate hardware status page\n");
return -ENOMEM;
}
- dev_priv->hw_status_page = dev_priv->status_page_dmah->vaddr;
+ dev_priv->render_ring.status_page.page_addr
+ = dev_priv->status_page_dmah->vaddr;
dev_priv->dma_status_page = dev_priv->status_page_dmah->busaddr;
- memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
+ memset(dev_priv->render_ring.status_page.page_addr, 0, PAGE_SIZE);
if (IS_I965G(dev))
dev_priv->dma_status_page |= (dev_priv->dma_status_page >> 28) &
- 0xf0;
+ 0xf0;
I915_WRITE(HWS_PGA, dev_priv->dma_status_page);
DRM_DEBUG_DRIVER("Enabled hardware status page\n");
@@ -171,7 +94,7 @@ void i915_kernel_lost_context(struct drm_device * dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_master_private *master_priv;
- drm_i915_ring_buffer_t *ring = &(dev_priv->ring);
+ struct intel_ring_buffer *ring = &(dev_priv->render_ring);
/*
* We should never lose context on the ring with modesetting
@@ -184,7 +107,7 @@ void i915_kernel_lost_context(struct drm_device * dev)
ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
ring->space = ring->head - (ring->tail + 8);
if (ring->space < 0)
- ring->space += ring->Size;
+ ring->space += ring->size;
if (!dev->primary->master)
return;
@@ -204,12 +127,7 @@ static int i915_dma_cleanup(struct drm_device * dev)
if (dev->irq_enabled)
drm_irq_uninstall(dev);
- if (dev_priv->ring.virtual_start) {
- drm_core_ioremapfree(&dev_priv->ring.map, dev);
- dev_priv->ring.virtual_start = NULL;
- dev_priv->ring.map.handle = NULL;
- dev_priv->ring.map.size = 0;
- }
+ intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
/* Clear the HWS virtual address at teardown */
if (I915_NEED_GFX_HWS(dev))
@@ -232,24 +150,24 @@ static int i915_initialize(struct drm_device * dev, drm_i915_init_t * init)
}
if (init->ring_size != 0) {
- if (dev_priv->ring.ring_obj != NULL) {
+ if (dev_priv->render_ring.gem_object != NULL) {
i915_dma_cleanup(dev);
DRM_ERROR("Client tried to initialize ringbuffer in "
"GEM mode\n");
return -EINVAL;
}
- dev_priv->ring.Size = init->ring_size;
+ dev_priv->render_ring.size = init->ring_size;
- dev_priv->ring.map.offset = init->ring_start;
- dev_priv->ring.map.size = init->ring_size;
- dev_priv->ring.map.type = 0;
- dev_priv->ring.map.flags = 0;
- dev_priv->ring.map.mtrr = 0;
+ dev_priv->render_ring.map.offset = init->ring_start;
+ dev_priv->render_ring.map.size = init->ring_size;
+ dev_priv->render_ring.map.type = 0;
+ dev_priv->render_ring.map.flags = 0;
+ dev_priv->render_ring.map.mtrr = 0;
- drm_core_ioremap_wc(&dev_priv->ring.map, dev);
+ drm_core_ioremap_wc(&dev_priv->render_ring.map, dev);
- if (dev_priv->ring.map.handle == NULL) {
+ if (dev_priv->render_ring.map.handle == NULL) {
i915_dma_cleanup(dev);
DRM_ERROR("can not ioremap virtual address for"
" ring buffer\n");
@@ -257,7 +175,7 @@ static int i915_initialize(struct drm_device * dev, drm_i915_init_t * init)
}
}
- dev_priv->ring.virtual_start = dev_priv->ring.map.handle;
+ dev_priv->render_ring.virtual_start = dev_priv->render_ring.map.handle;
dev_priv->cpp = init->cpp;
dev_priv->back_offset = init->back_offset;
@@ -277,26 +195,29 @@ static int i915_dma_resume(struct drm_device * dev)
{
drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+ struct intel_ring_buffer *ring;
DRM_DEBUG_DRIVER("%s\n", __func__);
- if (dev_priv->ring.map.handle == NULL) {
+ ring = &dev_priv->render_ring;
+
+ if (ring->map.handle == NULL) {
DRM_ERROR("can not ioremap virtual address for"
" ring buffer\n");
return -ENOMEM;
}
/* Program Hardware Status Page */
- if (!dev_priv->hw_status_page) {
+ if (!ring->status_page.page_addr) {
DRM_ERROR("Can not find hardware status page\n");
return -EINVAL;
}
DRM_DEBUG_DRIVER("hw status page @ %p\n",
- dev_priv->hw_status_page);
-
- if (dev_priv->status_gfx_addr != 0)
- I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
+ ring->status_page.page_addr);
+ if (ring->status_page.gfx_addr != 0)
+ ring->setup_status_page(dev, ring);
else
I915_WRITE(HWS_PGA, dev_priv->dma_status_page);
+
DRM_DEBUG_DRIVER("Enabled hardware status page\n");
return 0;
@@ -406,9 +327,8 @@ static int i915_emit_cmds(struct drm_device * dev, int *buffer, int dwords)
{
drm_i915_private_t *dev_priv = dev->dev_private;
int i;
- RING_LOCALS;
- if ((dwords+1) * sizeof(int) >= dev_priv->ring.Size - 8)
+ if ((dwords+1) * sizeof(int) >= dev_priv->render_ring.size - 8)
return -EINVAL;
BEGIN_LP_RING((dwords+1)&~1);
@@ -441,9 +361,7 @@ i915_emit_box(struct drm_device *dev,
struct drm_clip_rect *boxes,
int i, int DR1, int DR4)
{
- drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_clip_rect box = boxes[i];
- RING_LOCALS;
if (box.y2 <= box.y1 || box.x2 <= box.x1 || box.y2 <= 0 || box.x2 <= 0) {
DRM_ERROR("Bad box %d,%d..%d,%d\n",
@@ -480,7 +398,6 @@ static void i915_emit_breadcrumb(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
- RING_LOCALS;
dev_priv->counter++;
if (dev_priv->counter > 0x7FFFFFFFUL)
@@ -534,10 +451,8 @@ static int i915_dispatch_batchbuffer(struct drm_device * dev,
drm_i915_batchbuffer_t * batch,
struct drm_clip_rect *cliprects)
{
- drm_i915_private_t *dev_priv = dev->dev_private;
int nbox = batch->num_cliprects;
int i = 0, count;
- RING_LOCALS;
if ((batch->start | batch->used) & 0x7) {
DRM_ERROR("alignment");
@@ -586,7 +501,6 @@ static int i915_dispatch_flip(struct drm_device * dev)
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_master_private *master_priv =
dev->primary->master->driver_priv;
- RING_LOCALS;
if (!master_priv->sarea_priv)
return -EINVAL;
@@ -639,7 +553,8 @@ static int i915_quiescent(struct drm_device * dev)
drm_i915_private_t *dev_priv = dev->dev_private;
i915_kernel_lost_context(dev);
- return i915_wait_ring(dev, dev_priv->ring.Size - 8, __func__);
+ return intel_wait_ring_buffer(dev, &dev_priv->render_ring,
+ dev_priv->render_ring.size - 8);
}
static int i915_flush_ioctl(struct drm_device *dev, void *data,
@@ -881,10 +796,13 @@ static int i915_set_status_page(struct drm_device *dev, void *data,
{
drm_i915_private_t *dev_priv = dev->dev_private;
drm_i915_hws_addr_t *hws = data;
+ struct intel_ring_buffer *ring;
if (!I915_NEED_GFX_HWS(dev))
return -EINVAL;
+ ring = &dev_priv->render_ring;
+
if (!dev_priv) {
DRM_ERROR("called with no initialization\n");
return -EINVAL;
@@ -897,7 +815,7 @@ static int i915_set_status_page(struct drm_device *dev, void *data,
DRM_DEBUG_DRIVER("set status page addr 0x%08x\n", (u32)hws->addr);
- dev_priv->status_gfx_addr = hws->addr & (0x1ffff<<12);
+ ring->status_page.gfx_addr = hws->addr & (0x1ffff<<12);
dev_priv->hws_map.offset = dev->agp->base + hws->addr;
dev_priv->hws_map.size = 4*1024;
@@ -913,14 +831,13 @@ static int i915_set_status_page(struct drm_device *dev, void *data,
" G33 hw status page\n");
return -ENOMEM;
}
- dev_priv->hw_status_page = dev_priv->hws_map.handle;
-
- memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
- I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
+ ring->status_page.page_addr = dev_priv->hws_map.handle;
+ memset(ring->status_page.page_addr, 0, PAGE_SIZE);
+ I915_WRITE(HWS_PGA, ring->status_page.gfx_addr);
DRM_DEBUG_DRIVER("load hws HWS_PGA with gfx mem 0x%x\n",
- dev_priv->status_gfx_addr);
+ ring->status_page.gfx_addr);
DRM_DEBUG_DRIVER("load hws at %p\n",
- dev_priv->hw_status_page);
+ ring->status_page.page_addr);
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 01e91ea..be0f31b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -388,32 +388,9 @@ int i965_reset(struct drm_device *dev, u8 flags)
*/
if (drm_core_check_feature(dev, DRIVER_MODESET) ||
!dev_priv->mm.suspended) {
- drm_i915_ring_buffer_t *ring = &dev_priv->ring;
- struct drm_gem_object *obj = ring->ring_obj;
- struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+ struct intel_ring_buffer *ring = &dev_priv->render_ring;
dev_priv->mm.suspended = 0;
-
- /* Stop the ring if it's running. */
- I915_WRITE(PRB0_CTL, 0);
- I915_WRITE(PRB0_TAIL, 0);
- I915_WRITE(PRB0_HEAD, 0);
-
- /* Initialize the ring. */
- I915_WRITE(PRB0_START, obj_priv->gtt_offset);
- I915_WRITE(PRB0_CTL,
- ((obj->size - 4096) & RING_NR_PAGES) |
- RING_NO_REPORT |
- RING_VALID);
- if (!drm_core_check_feature(dev, DRIVER_MODESET))
- i915_kernel_lost_context(dev);
- else {
- ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
- ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
- ring->space = ring->head - (ring->tail + 8);
- if (ring->space < 0)
- ring->space += ring->Size;
- }
-
+ ring->init(dev, ring);
mutex_unlock(&dev->struct_mutex);
drm_irq_uninstall(dev);
drm_irq_install(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 32953ad..83d6e9b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -237,10 +237,10 @@ typedef struct drm_i915_private {
void __iomem *regs;
struct pci_dev *bridge_dev;
- drm_i915_ring_buffer_t ring;
+ struct intel_ring_buffer render_ring;
+
drm_dma_handle_t *status_page_dmah;
- void *hw_status_page;
dma_addr_t dma_status_page;
uint32_t counter;
unsigned int status_gfx_addr;
@@ -1017,7 +1017,8 @@ extern int intel_trans_dp_port_sel (struct drm_crtc *crtc);
* has access to the ring.
*/
#define RING_LOCK_TEST_WITH_RETURN(dev, file_priv) do { \
- if (((drm_i915_private_t *)dev->dev_private)->ring.ring_obj == NULL) \
+ if (((drm_i915_private_t *)dev->dev_private)->render_ring.gem_object \
+ == NULL) \
LOCK_TEST_WITH_RETURN(dev, file_priv); \
} while (0)
@@ -1036,29 +1037,18 @@ extern int intel_trans_dp_port_sel (struct drm_crtc *crtc);
#define RING_LOCALS volatile unsigned int *ring_virt__;
#define BEGIN_LP_RING(n) do { \
- int bytes__ = 4*(n); \
- if (I915_VERBOSE) DRM_DEBUG("BEGIN_LP_RING(%d)\n", (n)); \
- /* a wrap must occur between instructions so pad beforehand */ \
- if (unlikely (dev_priv->ring.tail + bytes__ > dev_priv->ring.Size)) \
- i915_wrap_ring(dev); \
- if (unlikely (dev_priv->ring.space < bytes__)) \
- i915_wait_ring(dev, bytes__, __func__); \
- ring_virt__ = (unsigned int *) \
- (dev_priv->ring.virtual_start + dev_priv->ring.tail); \
- dev_priv->ring.tail += bytes__; \
- dev_priv->ring.tail &= dev_priv->ring.Size - 1; \
- dev_priv->ring.space -= bytes__; \
+ drm_i915_private_t *dev_priv = dev->dev_private; \
+ intel_ring_begin(dev, &dev_priv->render_ring, 4*(n)); \
} while (0)
-#define OUT_RING(n) do { \
- if (I915_VERBOSE) DRM_DEBUG(" OUT_RING %x\n", (int)(n)); \
- *ring_virt__++ = (n); \
+#define OUT_RING(x) do { \
+ drm_i915_private_t *dev_priv = dev->dev_private; \
+ intel_ring_emit(dev, &dev_priv->render_ring, x); \
} while (0)
#define ADVANCE_LP_RING() do { \
- if (I915_VERBOSE) \
- DRM_DEBUG("ADVANCE_LP_RING %x\n", dev_priv->ring.tail); \
- I915_WRITE(PRB0_TAIL, dev_priv->ring.tail); \
+ drm_i915_private_t *dev_priv = dev->dev_private; \
+ intel_ring_advance(dev, &dev_priv->render_ring); \
} while(0)
/**
@@ -1076,7 +1066,8 @@ extern int intel_trans_dp_port_sel (struct drm_crtc *crtc);
*
* The area from dword 0x20 to 0x3ff is available for driver usage.
*/
-#define READ_HWSP(dev_priv, reg) (((volatile u32*)(dev_priv->hw_status_page))[reg])
+#define READ_HWSP(dev_priv, reg) (((volatile u32 *)\
+ (dev_priv->render_ring.status_page.page_addr))[reg])
#define READ_BREADCRUMB(dev_priv) READ_HWSP(dev_priv, I915_BREADCRUMB_INDEX)
#define I915_GEM_HWS_INDEX 0x20
#define I915_BREADCRUMB_INDEX 0x21
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b85727c..7b8fa88 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1604,7 +1604,6 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
struct drm_i915_gem_request *request;
uint32_t seqno;
int was_empty;
- RING_LOCALS;
if (file_priv != NULL)
i915_file_priv = file_priv->driver_priv;
@@ -1665,10 +1664,8 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
static uint32_t
i915_retire_commands(struct drm_device *dev)
{
- drm_i915_private_t *dev_priv = dev->dev_private;
uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
uint32_t flush_domains = 0;
- RING_LOCALS;
/* The sampler always gets flushed on i965 (sigh) */
if (IS_I965G(dev))
@@ -1763,7 +1760,8 @@ i915_gem_retire_requests(struct drm_device *dev)
drm_i915_private_t *dev_priv = dev->dev_private;
uint32_t seqno;
- if (!dev_priv->hw_status_page || list_empty(&dev_priv->mm.request_list))
+ if (!dev_priv->render_ring.status_page.page_addr
+ || list_empty(&dev_priv->mm.request_list))
return;
seqno = i915_get_gem_seqno(dev);
@@ -1890,7 +1888,6 @@ i915_gem_flush(struct drm_device *dev,
{
drm_i915_private_t *dev_priv = dev->dev_private;
uint32_t cmd;
- RING_LOCALS;
#if WATCH_EXEC
DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
@@ -3497,7 +3494,6 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
int nbox = exec->num_cliprects;
int i = 0, count;
uint32_t exec_start, exec_len;
- RING_LOCALS;
exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
exec_len = (uint32_t) exec->batch_len;
@@ -4507,7 +4503,8 @@ i915_gem_idle(struct drm_device *dev)
mutex_lock(&dev->struct_mutex);
- if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) {
+ if (dev_priv->mm.suspended ||
+ dev_priv->render_ring.gem_object == NULL) {
mutex_unlock(&dev->struct_mutex);
return 0;
}
@@ -4545,213 +4542,28 @@ i915_gem_idle(struct drm_device *dev)
return 0;
}
-static int
-i915_gem_init_hws(struct drm_device *dev)
-{
- drm_i915_private_t *dev_priv = dev->dev_private;
- struct drm_gem_object *obj;
- struct drm_i915_gem_object *obj_priv;
- int ret;
-
- /* If we need a physical address for the status page, it's already
- * initialized at driver load time.
- */
- if (!I915_NEED_GFX_HWS(dev))
- return 0;
-
- obj = drm_gem_object_alloc(dev, 4096);
- if (obj == NULL) {
- DRM_ERROR("Failed to allocate status page\n");
- return -ENOMEM;
- }
- obj_priv = to_intel_bo(obj);
- obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
-
- ret = i915_gem_object_pin(obj, 4096);
- if (ret != 0) {
- drm_gem_object_unreference(obj);
- return ret;
- }
-
- dev_priv->status_gfx_addr = obj_priv->gtt_offset;
-
- dev_priv->hw_status_page = kmap(obj_priv->pages[0]);
- if (dev_priv->hw_status_page == NULL) {
- DRM_ERROR("Failed to map status page.\n");
- memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
- i915_gem_object_unpin(obj);
- drm_gem_object_unreference(obj);
- return -EINVAL;
- }
- dev_priv->hws_obj = obj;
- memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
- if (IS_GEN6(dev)) {
- I915_WRITE(HWS_PGA_GEN6, dev_priv->status_gfx_addr);
- I915_READ(HWS_PGA_GEN6); /* posting read */
- } else {
- I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
- I915_READ(HWS_PGA); /* posting read */
- }
- DRM_DEBUG_DRIVER("hws offset: 0x%08x\n", dev_priv->status_gfx_addr);
-
- return 0;
-}
-
-static void
-i915_gem_cleanup_hws(struct drm_device *dev)
-{
- drm_i915_private_t *dev_priv = dev->dev_private;
- struct drm_gem_object *obj;
- struct drm_i915_gem_object *obj_priv;
-
- if (dev_priv->hws_obj == NULL)
- return;
-
- obj = dev_priv->hws_obj;
- obj_priv = to_intel_bo(obj);
-
- kunmap(obj_priv->pages[0]);
- i915_gem_object_unpin(obj);
- drm_gem_object_unreference(obj);
- dev_priv->hws_obj = NULL;
-
- memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
- dev_priv->hw_status_page = NULL;
-
- /* Write high address into HWS_PGA when disabling. */
- I915_WRITE(HWS_PGA, 0x1ffff000);
-}
-
int
i915_gem_init_ringbuffer(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
- struct drm_gem_object *obj;
- struct drm_i915_gem_object *obj_priv;
- drm_i915_ring_buffer_t *ring = &dev_priv->ring;
int ret;
- u32 head;
-
- ret = i915_gem_init_hws(dev);
- if (ret != 0)
- return ret;
-
- obj = drm_gem_object_alloc(dev, 128 * 1024);
- if (obj == NULL) {
- DRM_ERROR("Failed to allocate ringbuffer\n");
- i915_gem_cleanup_hws(dev);
- return -ENOMEM;
- }
- obj_priv = to_intel_bo(obj);
-
- ret = i915_gem_object_pin(obj, 4096);
- if (ret != 0) {
- drm_gem_object_unreference(obj);
- i915_gem_cleanup_hws(dev);
- return ret;
- }
-
- /* Set up the kernel mapping for the ring. */
- ring->Size = obj->size;
-
- ring->map.offset = dev->agp->base + obj_priv->gtt_offset;
- ring->map.size = obj->size;
- ring->map.type = 0;
- ring->map.flags = 0;
- ring->map.mtrr = 0;
- drm_core_ioremap_wc(&ring->map, dev);
- if (ring->map.handle == NULL) {
- DRM_ERROR("Failed to map ringbuffer.\n");
- memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
- i915_gem_object_unpin(obj);
- drm_gem_object_unreference(obj);
- i915_gem_cleanup_hws(dev);
- return -EINVAL;
- }
- ring->ring_obj = obj;
- ring->virtual_start = ring->map.handle;
-
- /* Stop the ring if it's running. */
- I915_WRITE(PRB0_CTL, 0);
- I915_WRITE(PRB0_TAIL, 0);
- I915_WRITE(PRB0_HEAD, 0);
-
- /* Initialize the ring. */
- I915_WRITE(PRB0_START, obj_priv->gtt_offset);
- head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
-
- /* G45 ring initialization fails to reset head to zero */
- if (head != 0) {
- DRM_ERROR("Ring head not reset to zero "
- "ctl %08x head %08x tail %08x start %08x\n",
- I915_READ(PRB0_CTL),
- I915_READ(PRB0_HEAD),
- I915_READ(PRB0_TAIL),
- I915_READ(PRB0_START));
- I915_WRITE(PRB0_HEAD, 0);
-
- DRM_ERROR("Ring head forced to zero "
- "ctl %08x head %08x tail %08x start %08x\n",
- I915_READ(PRB0_CTL),
- I915_READ(PRB0_HEAD),
- I915_READ(PRB0_TAIL),
- I915_READ(PRB0_START));
- }
-
- I915_WRITE(PRB0_CTL,
- ((obj->size - 4096) & RING_NR_PAGES) |
- RING_NO_REPORT |
- RING_VALID);
-
- head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
-
- /* If the head is still not zero, the ring is dead */
- if (head != 0) {
- DRM_ERROR("Ring initialization failed "
- "ctl %08x head %08x tail %08x start %08x\n",
- I915_READ(PRB0_CTL),
- I915_READ(PRB0_HEAD),
- I915_READ(PRB0_TAIL),
- I915_READ(PRB0_START));
- return -EIO;
- }
-
- /* Update our cache of the ring state */
- if (!drm_core_check_feature(dev, DRIVER_MODESET))
- i915_kernel_lost_context(dev);
- else {
- ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
- ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
- ring->space = ring->head - (ring->tail + 8);
- if (ring->space < 0)
- ring->space += ring->Size;
+ dev_priv->render_ring = render_ring;
+ if (!I915_NEED_GFX_HWS(dev)) {
+ dev_priv->render_ring.status_page.page_addr
+ = dev_priv->status_page_dmah->vaddr;
+ memset(dev_priv->render_ring.status_page.page_addr
+ 0, PAGE_SIZE);
}
-
- if (IS_I9XX(dev) && !IS_GEN3(dev)) {
- I915_WRITE(MI_MODE,
- (VS_TIMER_DISPATCH) << 16 | VS_TIMER_DISPATCH);
- }
-
- return 0;
+ ret = intel_init_ring_buffer(dev, &dev_priv->render_ring);
+ return ret;
}
void
i915_gem_cleanup_ringbuffer(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
-
- if (dev_priv->ring.ring_obj == NULL)
- return;
-
- drm_core_ioremapfree(&dev_priv->ring.map, dev);
-
- i915_gem_object_unpin(dev_priv->ring.ring_obj);
- drm_gem_object_unreference(dev_priv->ring.ring_obj);
- dev_priv->ring.ring_obj = NULL;
- memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
-
- i915_gem_cleanup_hws(dev);
+ intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
}
int
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 301702a..1e3299a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -531,17 +531,18 @@ i915_ringbuffer_last_batch(struct drm_device *dev)
*/
bbaddr = 0;
head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
- ring = (u32 *)(dev_priv->ring.virtual_start + head);
+ ring = (u32 *)(dev_priv->render_ring.virtual_start + head);
- while (--ring >= (u32 *)dev_priv->ring.virtual_start) {
+ while (--ring >= (u32 *)dev_priv->render_ring.virtual_start) {
bbaddr = i915_get_bbaddr(dev, ring);
if (bbaddr)
break;
}
if (bbaddr == 0) {
- ring = (u32 *)(dev_priv->ring.virtual_start + dev_priv->ring.Size);
- while (--ring >= (u32 *)dev_priv->ring.virtual_start) {
+ ring = (u32 *)(dev_priv->render_ring.virtual_start
+ + dev_priv->render_ring.size);
+ while (--ring >= (u32 *)dev_priv->render_ring.virtual_start) {
bbaddr = i915_get_bbaddr(dev, ring);
if (bbaddr)
break;
@@ -551,6 +552,7 @@ i915_ringbuffer_last_batch(struct drm_device *dev)
return bbaddr;
}
+
/**
* i915_capture_error_state - capture an error record for later analysis
* @dev: drm device
@@ -634,7 +636,8 @@ static void i915_capture_error_state(struct drm_device *dev)
error->batchbuffer[1] = i915_error_object_create(dev, batchbuffer[1]);
/* Record the ringbuffer */
- error->ringbuffer = i915_error_object_create(dev, dev_priv->ring.ring_obj);
+ error->ringbuffer = i915_error_object_create(dev,
+ dev_priv->render_ring.gem_object);
/* Record buffers on the active list. */
error->active_bo = NULL;
@@ -979,7 +982,6 @@ static int i915_emit_irq(struct drm_device * dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
- RING_LOCALS;
i915_kernel_lost_context(dev);
@@ -1082,7 +1084,7 @@ int i915_irq_emit(struct drm_device *dev, void *data,
drm_i915_irq_emit_t *emit = data;
int result;
- if (!dev_priv || !dev_priv->ring.virtual_start) {
+ if (!dev_priv || !dev_priv->render_ring.virtual_start) {
DRM_ERROR("called with no initialization\n");
return -EINVAL;
}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 58668c4..edac8cf 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4603,7 +4603,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
unsigned long flags;
int pipesrc_reg = (intel_crtc->pipe == 0) ? PIPEASRC : PIPEBSRC;
int ret, pipesrc;
- RING_LOCALS;
work = kzalloc(sizeof *work, GFP_KERNEL);
if (work == NULL)
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 6d524a1..2e490ad 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -211,10 +211,7 @@ static void intel_overlay_unmap_regs_atomic(struct intel_overlay *overlay)
static int intel_overlay_on(struct intel_overlay *overlay)
{
struct drm_device *dev = overlay->dev;
- drm_i915_private_t *dev_priv = dev->dev_private;
int ret;
- RING_LOCALS;
-
BUG_ON(overlay->active);
overlay->active = 1;
@@ -248,7 +245,6 @@ static void intel_overlay_continue(struct intel_overlay *overlay,
drm_i915_private_t *dev_priv = dev->dev_private;
u32 flip_addr = overlay->flip_addr;
u32 tmp;
- RING_LOCALS;
BUG_ON(!overlay->active);
@@ -274,7 +270,6 @@ static int intel_overlay_wait_flip(struct intel_overlay *overlay)
drm_i915_private_t *dev_priv = dev->dev_private;
int ret;
u32 tmp;
- RING_LOCALS;
if (overlay->last_flip_req != 0) {
ret = i915_do_wait_request(dev, overlay->last_flip_req, 1);
@@ -314,9 +309,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
{
u32 flip_addr = overlay->flip_addr;
struct drm_device *dev = overlay->dev;
- drm_i915_private_t *dev_priv = dev->dev_private;
int ret;
- RING_LOCALS;
BUG_ON(!overlay->active);
@@ -390,11 +383,9 @@ int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay,
int interruptible)
{
struct drm_device *dev = overlay->dev;
- drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_gem_object *obj;
u32 flip_addr;
int ret;
- RING_LOCALS;
if (overlay->hw_wedged == HW_WEDGED)
return -EIO;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 65c540b..33e4fde 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -185,6 +185,15 @@ static int init_render_ring(struct drm_device *dev,
return -EIO;
}
+ if (!drm_core_check_feature(dev, DRIVER_MODESET))
+ i915_kernel_lost_context(dev);
+ else {
+ ring->head = ring->get_head(dev, ring);
+ ring->tail = ring->get_tail(dev, ring);
+ ring->space = ring->head - (ring->tail + 8);
+ if (ring->space < 0)
+ ring->space += ring->size;
+ }
return 0;
}
@@ -419,11 +428,16 @@ int intel_init_ring_buffer(struct drm_device *dev,
return ret;
}
- ring->head = ring->get_head(dev, ring);
- ring->tail = ring->get_tail(dev, ring);
- ring->space = ring->head - (ring->tail + 8);
- if (ring->space < 0)
- ring->space += ring->size;
+ if (!drm_core_check_feature(dev, DRIVER_MODESET))
+ i915_kernel_lost_context(dev);
+ else {
+ ring->head = ring->get_head(dev, ring);
+ ring->tail = ring->get_tail(dev, ring);
+ ring->space = ring->head - (ring->tail + 8);
+ if (ring->space < 0)
+ ring->space += ring->size;
+ }
+
INIT_LIST_HEAD(&ring->active_list);
INIT_LIST_HEAD(&ring->request_list);
return ret;
--
1.7.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PATCH 3/4] add BSD ring buffer support
2010-05-06 1:20 [PATCH 1/4] introduce intel_ring_buffer structure Zou Nan hai
2010-05-06 1:20 ` [PATCH 2/4] convert render engine to use intel_ring_buffer Zou Nan hai
@ 2010-05-06 1:20 ` Zou Nan hai
2010-05-06 1:20 ` [PATCH 4/4] adapt intel_ring_buffer into gem Zou Nan hai
` (2 subsequent siblings)
4 siblings, 0 replies; 24+ messages in thread
From: Zou Nan hai @ 2010-05-06 1:20 UTC (permalink / raw)
To: Anholt Eric, Intel GFX
add BSD ring buffer support on GM45+ systems.
BSD (bit-stream-decoder) engine is a GPU engine for H.264/VC1
VLD decoding.
Signed-off-by: Xiang Haihao <haihao.xiang@intel.com>
Signed-off-by: Zou Nanhai <nanhai.zou@intel.com>
---
drivers/gpu/drm/i915/i915_dma.c | 2 +
drivers/gpu/drm/i915/i915_drv.h | 2 +
drivers/gpu/drm/i915/i915_gem.c | 76 ++--------
drivers/gpu/drm/i915/i915_reg.h | 16 ++
drivers/gpu/drm/i915/intel_ringbuffer.c | 232 ++++++++++++++++++++++++++++---
drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +
6 files changed, 252 insertions(+), 82 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 88ec88c..063f355 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -128,6 +128,8 @@ static int i915_dma_cleanup(struct drm_device * dev)
drm_irq_uninstall(dev);
intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
+ if (HAS_BSD(dev))
+ intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
/* Clear the HWS virtual address at teardown */
if (I915_NEED_GFX_HWS(dev))
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 83d6e9b..ee0a15e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -238,6 +238,7 @@ typedef struct drm_i915_private {
struct pci_dev *bridge_dev;
struct intel_ring_buffer render_ring;
+ struct intel_ring_buffer bsd_ring;
drm_dma_handle_t *status_page_dmah;
@@ -1120,6 +1121,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
(dev)->pci_device == 0x2A42 || \
(dev)->pci_device == 0x2E42)
+#define HAS_BSD(dev) (IS_IRONLAKE(dev) || IS_G4X(dev))
#define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws)
/* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7b8fa88..450c67e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1887,70 +1887,17 @@ i915_gem_flush(struct drm_device *dev,
uint32_t flush_domains)
{
drm_i915_private_t *dev_priv = dev->dev_private;
- uint32_t cmd;
-
-#if WATCH_EXEC
- DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
- invalidate_domains, flush_domains);
-#endif
- trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno,
- invalidate_domains, flush_domains);
-
if (flush_domains & I915_GEM_DOMAIN_CPU)
drm_agp_chipset_flush(dev);
- if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) {
- /*
- * read/write caches:
- *
- * I915_GEM_DOMAIN_RENDER is always invalidated, but is
- * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
- * also flushed at 2d versus 3d pipeline switches.
- *
- * read-only caches:
- *
- * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
- * MI_READ_FLUSH is set, and is always flushed on 965.
- *
- * I915_GEM_DOMAIN_COMMAND may not exist?
- *
- * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
- * invalidated when MI_EXE_FLUSH is set.
- *
- * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
- * invalidated with every MI_FLUSH.
- *
- * TLBs:
- *
- * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
- * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
- * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
- * are flushed at any MI_FLUSH.
- */
-
- cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
- if ((invalidate_domains|flush_domains) &
- I915_GEM_DOMAIN_RENDER)
- cmd &= ~MI_NO_WRITE_FLUSH;
- if (!IS_I965G(dev)) {
- /*
- * On the 965, the sampler cache always gets flushed
- * and this bit is reserved.
- */
- if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
- cmd |= MI_READ_FLUSH;
- }
- if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
- cmd |= MI_EXE_FLUSH;
+ dev_priv->render_ring.flush(dev, &dev_priv->render_ring,
+ invalidate_domains,
+ flush_domains);
-#if WATCH_EXEC
- DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd);
-#endif
- BEGIN_LP_RING(2);
- OUT_RING(cmd);
- OUT_RING(MI_NOOP);
- ADVANCE_LP_RING();
- }
+ if (HAS_BSD(dev))
+ dev_priv->bsd_ring.flush(dev, &dev_priv->bsd_ring,
+ invalidate_domains,
+ flush_domains);
}
/**
@@ -4552,10 +4499,14 @@ i915_gem_init_ringbuffer(struct drm_device *dev)
if (!I915_NEED_GFX_HWS(dev)) {
dev_priv->render_ring.status_page.page_addr
= dev_priv->status_page_dmah->vaddr;
- memset(dev_priv->render_ring.status_page.page_addr
+ memset(dev_priv->render_ring.status_page.page_addr,
0, PAGE_SIZE);
}
ret = intel_init_ring_buffer(dev, &dev_priv->render_ring);
+ if (!ret && HAS_BSD(dev)) {
+ dev_priv->bsd_ring = bsd_ring;
+ ret = intel_init_ring_buffer(dev, &dev_priv->bsd_ring);
+ }
return ret;
}
@@ -4564,6 +4515,9 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
+ if (HAS_BSD(dev))
+ intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
+
}
int
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 527d30a..64f6bba 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -324,6 +324,7 @@
#define I915_DEBUG_INTERRUPT (1<<2)
#define I915_USER_INTERRUPT (1<<1)
#define I915_ASLE_INTERRUPT (1<<0)
+#define I915_BSD_USER_INTERRUPT (1<<25)
#define EIR 0x020b0
#define EMR 0x020b4
#define ESR 0x020b8
@@ -358,6 +359,18 @@
#define BB_ADDR 0x02140 /* 8 bytes */
#define GFX_FLSH_CNTL 0x02170 /* 915+ only */
+/*
+ * BSD (bit stream decoder instruction and interrupt control register defines
+ * (G4X and Ironlake only)
+ */
+
+#define BSD_RING_TAIL 0x04030
+#define BSD_RING_HEAD 0x04034
+#define BSD_RING_START 0x04038
+#define BSD_RING_CTL 0x0403c
+#define BSD_RING_ACTHD 0x04074
+#define BSD_HWS_PGA 0x04080
+
/*
* Framebuffer compression (915+ only)
@@ -2342,6 +2355,9 @@
#define GT_SYNC_STATUS (1 << 2)
#define GT_USER_INTERRUPT (1 << 0)
+#define GT_BSD_USER_INTERRUPT (1 << 5)
+
+
#define GTISR 0x44010
#define GTIMR 0x44014
#define GTIIR 0x44018
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 33e4fde..4df70fe 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -130,8 +130,7 @@ static void render_ring_advance_ring(struct drm_device *dev,
I915_WRITE(PRB0_TAIL, ring->tail);
}
-
-static int init_render_ring(struct drm_device *dev,
+static int init_ring_common(struct drm_device *dev,
struct intel_ring_buffer *ring)
{
u32 head;
@@ -140,12 +139,12 @@ static int init_render_ring(struct drm_device *dev,
obj_priv = ring->gem_object->driver_private;
/* Stop the ring if it's running. */
- I915_WRITE(PRB0_CTL, 0);
- I915_WRITE(PRB0_HEAD, 0);
- I915_WRITE(PRB0_TAIL, 0);
+ I915_WRITE(ring->regs.ctl, 0);
+ I915_WRITE(ring->regs.head, 0);
+ I915_WRITE(ring->regs.tail, 0);
/* Initialize the ring. */
- I915_WRITE(PRB0_START, obj_priv->gtt_offset);
+ I915_WRITE(ring->regs.start, obj_priv->gtt_offset);
head = ring->get_head(dev, ring);
/* G45 ring initialization fails to reset head to zero */
@@ -153,35 +152,36 @@ static int init_render_ring(struct drm_device *dev,
DRM_ERROR("%s head not reset to zero "
"ctl %08x head %08x tail %08x start %08x\n",
ring->name,
- I915_READ(PRB0_CTL),
- I915_READ(PRB0_HEAD),
- I915_READ(PRB0_TAIL),
- I915_READ(PRB0_START));
- I915_WRITE(PRB0_HEAD, 0);
+ I915_READ(ring->regs.ctl),
+ I915_READ(ring->regs.head),
+ I915_READ(ring->regs.tail),
+ I915_READ(ring->regs.start));
+
+ I915_WRITE(ring->regs.head, 0);
DRM_ERROR("%s head forced to zero "
"ctl %08x head %08x tail %08x start %08x\n",
ring->name,
- I915_READ(PRB0_CTL),
- I915_READ(PRB0_HEAD),
- I915_READ(PRB0_TAIL),
- I915_READ(PRB0_START));
+ I915_READ(ring->regs.ctl),
+ I915_READ(ring->regs.head),
+ I915_READ(ring->regs.tail),
+ I915_READ(ring->regs.start));
}
- I915_WRITE(PRB0_CTL,
+ I915_WRITE(ring->regs.ctl,
((ring->gem_object->size - PAGE_SIZE) & RING_NR_PAGES)
| RING_NO_REPORT | RING_VALID);
- head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
+ head = I915_READ(ring->regs.head) & HEAD_ADDR;
/* If the head is still not zero, the ring is dead */
if (head != 0) {
DRM_ERROR("%s initialization failed "
"ctl %08x head %08x tail %08x start %08x\n",
ring->name,
- I915_READ(PRB0_CTL),
- I915_READ(PRB0_HEAD),
- I915_READ(PRB0_TAIL),
- I915_READ(PRB0_START));
+ I915_READ(ring->regs.ctl),
+ I915_READ(ring->regs.head),
+ I915_READ(ring->regs.tail),
+ I915_READ(ring->regs.start));
return -EIO;
}
@@ -197,6 +197,12 @@ static int init_render_ring(struct drm_device *dev,
return 0;
}
+static int init_render_ring(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ return init_ring_common(dev, ring);
+}
+
static u32
render_ring_add_request(struct drm_device *dev,
@@ -273,6 +279,143 @@ static void render_setup_status_page(struct drm_device *dev,
(void)I915_READ(HWS_PGA);
}
+void
+bsd_ring_flush(struct drm_device *dev,
+ struct intel_ring_buffer *ring,
+ u32 invalidate_domains,
+ u32 flush_domains)
+{
+ intel_ring_begin(dev, ring, 8);
+ intel_ring_emit(dev, ring, MI_FLUSH);
+ intel_ring_emit(dev, ring, MI_NOOP);
+ intel_ring_advance(dev, ring);
+}
+
+static inline unsigned int bsd_ring_get_head(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ return I915_READ(BSD_RING_HEAD) & HEAD_ADDR;
+}
+
+static inline unsigned int bsd_ring_get_tail(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ return I915_READ(BSD_RING_TAIL) & TAIL_ADDR;
+}
+
+static inline unsigned int bsd_ring_get_active_head(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ return I915_READ(BSD_RING_ACTHD);
+}
+
+static inline void bsd_ring_advance_ring(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ I915_WRITE(BSD_RING_TAIL, ring->tail);
+}
+
+static int init_bsd_ring(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ return init_ring_common(dev, ring);
+}
+
+static u32
+bsd_ring_add_request(struct drm_device *dev,
+ struct intel_ring_buffer *ring,
+ struct drm_file *file_priv,
+ u32 flush_domains)
+{
+ u32 seqno;
+ seqno = intel_ring_get_seqno(dev, ring);
+ intel_ring_begin(dev, ring, 4);
+ intel_ring_emit(dev, ring, MI_STORE_DWORD_INDEX);
+ intel_ring_emit(dev, ring,
+ I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
+ intel_ring_emit(dev, ring, seqno);
+ intel_ring_emit(dev, ring, MI_USER_INTERRUPT);
+ intel_ring_advance(dev, ring);
+
+ DRM_DEBUG_DRIVER("%s %d\n", ring->name, seqno);
+
+ return seqno;
+}
+
+static void bsd_setup_status_page(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ I915_WRITE(BSD_HWS_PGA, ring->status_page.gfx_addr);
+ I915_READ(BSD_HWS_PGA);
+}
+
+static void
+bsd_ring_get_user_irq(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
+ if (dev->irq_enabled && (++ring->user_irq_refcount == 1)) {
+ if (IS_IRONLAKE(dev))
+ ironlake_enable_graphics_irq(dev_priv,
+ GT_BSD_USER_INTERRUPT);
+ else
+ i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
+ }
+ spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags);
+
+}
+
+static void
+bsd_ring_put_user_irq(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
+ BUG_ON(dev->irq_enabled && ring->user_irq_refcount <= 0);
+ if (dev->irq_enabled && (--ring->user_irq_refcount == 0)) {
+ if (IS_IRONLAKE(dev))
+ ironlake_disable_graphics_irq(dev_priv,
+ GT_BSD_USER_INTERRUPT);
+ else
+ i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
+ }
+ spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags);
+}
+
+static u32
+bsd_ring_get_gem_seqno(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
+{
+ return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
+}
+
+static int
+bsd_ring_dispatch_gem_execbuffer(struct drm_device *dev,
+ struct intel_ring_buffer *ring,
+ struct drm_i915_gem_execbuffer2 *exec,
+ struct drm_clip_rect *cliprects,
+ uint64_t exec_offset)
+{
+ uint32_t exec_start;
+ exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
+ intel_ring_begin(dev, ring, 2);
+ intel_ring_emit(dev, ring, MI_BATCH_BUFFER_START |
+ (2 << 6) | MI_BATCH_NON_SECURE_I965);
+ intel_ring_emit(dev, ring, exec_start);
+ intel_ring_advance(dev, ring);
+ return 0;
+}
+
static int
render_ring_dispatch_gem_execbuffer(struct drm_device *dev,
@@ -340,6 +483,7 @@ static void cleanup_status_page(struct drm_device *dev, struct intel_ring_buffer
memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
}
+
static int init_status_page(struct drm_device *dev, struct intel_ring_buffer *ring)
{
struct drm_i915_gem_object *obj_priv;
@@ -560,6 +704,12 @@ u32 intel_ring_get_seqno(struct drm_device *dev,
struct intel_ring_buffer render_ring = {
.name = "render ring",
+ .regs = {
+ .ctl = PRB0_CTL,
+ .head = PRB0_HEAD,
+ .tail = PRB0_TAIL,
+ .start = PRB0_START
+ },
.ring_flag = I915_EXEC_RENDER,
.size = 32 * PAGE_SIZE,
.alignment = PAGE_SIZE,
@@ -588,3 +738,43 @@ struct intel_ring_buffer render_ring = {
.status_page = {NULL, 0, NULL},
.map = {0,}
};
+
+/* ring buffer for bit-stream decoder */
+
+struct intel_ring_buffer bsd_ring = {
+ .name = "bsd ring",
+ .regs = {
+ .ctl = BSD_RING_CTL,
+ .head = BSD_RING_HEAD,
+ .tail = BSD_RING_TAIL,
+ .start = BSD_RING_START
+ },
+ .ring_flag = I915_EXEC_BSD,
+ .size = 32 * PAGE_SIZE,
+ .alignment = PAGE_SIZE,
+ .virtual_start = NULL,
+ .dev = NULL,
+ .gem_object = NULL,
+ .head = 0,
+ .tail = 0,
+ .space = 0,
+ .next_seqno = 1,
+ .user_irq_refcount = 0,
+ .irq_gem_seqno = 0,
+ .waiting_gem_seqno = 0,
+ .setup_status_page = bsd_setup_status_page,
+ .init = init_bsd_ring,
+ .get_head = bsd_ring_get_head,
+ .get_tail = bsd_ring_get_tail,
+ .get_active_head = bsd_ring_get_active_head,
+ .advance_ring = bsd_ring_advance_ring,
+ .flush = bsd_ring_flush,
+ .add_request = bsd_ring_add_request,
+ .get_gem_seqno = bsd_ring_get_gem_seqno,
+ .user_irq_get = bsd_ring_get_user_irq,
+ .user_irq_put = bsd_ring_put_user_irq,
+ .dispatch_gem_execbuffer = bsd_ring_dispatch_gem_execbuffer,
+ .status_page = {NULL, 0, NULL},
+ .map = {0,}
+};
+
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e3b40d1..b1748b6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -10,6 +10,12 @@ struct intel_hw_status_page {
struct drm_i915_gem_execbuffer2;
struct intel_ring_buffer {
const char *name;
+ struct ring_regs {
+ u32 ctl;
+ u32 head;
+ u32 tail;
+ u32 start;
+ } regs;
unsigned int ring_flag;
unsigned long size;
unsigned int alignment;
--
1.7.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PATCH 4/4] adapt intel_ring_buffer into gem
2010-05-06 1:20 [PATCH 1/4] introduce intel_ring_buffer structure Zou Nan hai
2010-05-06 1:20 ` [PATCH 2/4] convert render engine to use intel_ring_buffer Zou Nan hai
2010-05-06 1:20 ` [PATCH 3/4] add BSD ring buffer support Zou Nan hai
@ 2010-05-06 1:20 ` Zou Nan hai
2010-05-07 21:34 ` [PATCH 1/4] introduce intel_ring_buffer structure Eric Anholt
2010-05-11 22:24 ` Daniel Vetter
4 siblings, 0 replies; 24+ messages in thread
From: Zou Nan hai @ 2010-05-06 1:20 UTC (permalink / raw)
To: Anholt Eric, Intel GFX
remove single active_list amd request_list, seperate them into
every intel_ring_buffer structure.
use a flag in execbuffer2 to decide on which ring to run the
command
legacy ioctl are consider to run on render ring by default
Signed-off-by: Xiang Haihao <haihao.xiang@intel.com>
Signed-off-by: Zou Nanhai <nanhai.zou@intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 23 ++-
drivers/gpu/drm/i915/i915_dma.c | 4 +-
drivers/gpu/drm/i915/i915_drv.c | 5 +-
drivers/gpu/drm/i915/i915_drv.h | 64 ++----
drivers/gpu/drm/i915/i915_gem.c | 355 ++++++++++++++++++---------------
drivers/gpu/drm/i915/i915_gem_debug.c | 11 +-
drivers/gpu/drm/i915/i915_irq.c | 98 ++++-----
drivers/gpu/drm/i915/intel_overlay.c | 45 +++--
8 files changed, 322 insertions(+), 283 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index ea81ec1..7bf08c5 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -76,7 +76,7 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
case ACTIVE_LIST:
seq_printf(m, "Active:\n");
lock = &dev_priv->mm.active_list_lock;
- head = &dev_priv->mm.active_list;
+ head = &dev_priv->render_ring.active_list;
break;
case INACTIVE_LIST:
seq_printf(m, "Inactive:\n");
@@ -129,7 +129,8 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
struct drm_i915_gem_request *gem_request;
seq_printf(m, "Request:\n");
- list_for_each_entry(gem_request, &dev_priv->mm.request_list, list) {
+ list_for_each_entry(gem_request,
+ &dev_priv->render_ring.request_list, list) {
seq_printf(m, " %d @ %d\n",
gem_request->seqno,
(int) (jiffies - gem_request->emitted_jiffies));
@@ -145,13 +146,14 @@ static int i915_gem_seqno_info(struct seq_file *m, void *data)
if (dev_priv->render_ring.status_page.page_addr != NULL) {
seq_printf(m, "Current sequence: %d\n",
- i915_get_gem_seqno(dev));
+ i915_get_gem_seqno(dev, &dev_priv->render_ring));
} else {
seq_printf(m, "Current sequence: hws uninitialized\n");
}
seq_printf(m, "Waiter sequence: %d\n",
- dev_priv->mm.waiting_gem_seqno);
- seq_printf(m, "IRQ sequence: %d\n", dev_priv->mm.irq_gem_seqno);
+ dev_priv->render_ring.waiting_gem_seqno);
+ seq_printf(m, "IRQ sequence: %d\n",
+ dev_priv->render_ring.irq_gem_seqno);
return 0;
}
@@ -197,14 +199,14 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
atomic_read(&dev_priv->irq_received));
if (dev_priv->render_ring.status_page.page_addr != NULL) {
seq_printf(m, "Current sequence: %d\n",
- i915_get_gem_seqno(dev));
+ i915_get_gem_seqno(dev, &dev_priv->render_ring));
} else {
seq_printf(m, "Current sequence: hws uninitialized\n");
}
seq_printf(m, "Waiter sequence: %d\n",
- dev_priv->mm.waiting_gem_seqno);
+ dev_priv->render_ring.waiting_gem_seqno);
seq_printf(m, "IRQ sequence: %d\n",
- dev_priv->mm.irq_gem_seqno);
+ dev_priv->render_ring.irq_gem_seqno);
return 0;
}
@@ -287,7 +289,8 @@ static int i915_batchbuffer_info(struct seq_file *m, void *data)
spin_lock(&dev_priv->mm.active_list_lock);
- list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) {
+ list_for_each_entry(obj_priv,
+ &dev_priv->render_ring.active_list, list) {
obj = obj_priv->obj;
if (obj->read_domains & I915_GEM_DOMAIN_COMMAND) {
ret = i915_gem_object_get_pages(obj, 0);
@@ -682,7 +685,7 @@ i915_wedged_write(struct file *filp,
atomic_set(&dev_priv->mm.wedged, val);
if (val) {
- DRM_WAKEUP(&dev_priv->irq_queue);
+ DRM_WAKEUP(&dev_priv->render_ring.irq_queue);
queue_work(dev_priv->wq, &dev_priv->error_work);
}
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 063f355..0c0e1d7 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -81,7 +81,8 @@ static void i915_free_hws(struct drm_device *dev)
dev_priv->status_page_dmah = NULL;
}
- if (dev_priv->status_gfx_addr) {
+ if (dev_priv->render_ring.status_page.gfx_addr) {
+ dev_priv->render_ring.status_page.gfx_addr = 0;
dev_priv->status_gfx_addr = 0;
drm_core_ioremapfree(&dev_priv->hws_map, dev);
}
@@ -1616,7 +1617,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
spin_lock_init(&dev_priv->user_irq_lock);
spin_lock_init(&dev_priv->error_lock);
- dev_priv->user_irq_refcount = 0;
dev_priv->trace_irq_seqno = 0;
ret = drm_vblank_init(dev, I915_NUM_PIPE);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index be0f31b..a67680e 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -339,7 +339,9 @@ int i965_reset(struct drm_device *dev, u8 flags)
/*
* Clear request list
*/
- i915_gem_retire_requests(dev);
+
+ i915_gem_retire_requests(dev, &dev_priv->render_ring);
+ i915_gem_retire_requests(dev, &dev_priv->bsd_ring);
if (need_display)
i915_save_display(dev);
@@ -388,6 +390,7 @@ int i965_reset(struct drm_device *dev, u8 flags)
*/
if (drm_core_check_feature(dev, DRIVER_MODESET) ||
!dev_priv->mm.suspended) {
+
struct intel_ring_buffer *ring = &dev_priv->render_ring;
dev_priv->mm.suspended = 0;
ring->init(dev, ring);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ee0a15e..c066097 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -83,6 +83,7 @@ enum plane {
#define I915_GEM_PHYS_OVERLAY_REGS 3
#define I915_MAX_PHYS_OBJECT (I915_GEM_PHYS_OVERLAY_REGS)
+
struct drm_i915_gem_phys_object {
int id;
struct page **page_list;
@@ -237,9 +238,10 @@ typedef struct drm_i915_private {
void __iomem *regs;
struct pci_dev *bridge_dev;
+
struct intel_ring_buffer render_ring;
- struct intel_ring_buffer bsd_ring;
+ struct intel_ring_buffer bsd_ring;
drm_dma_handle_t *status_page_dmah;
dma_addr_t dma_status_page;
@@ -257,12 +259,10 @@ typedef struct drm_i915_private {
int current_page;
int page_flipping;
- wait_queue_head_t irq_queue;
atomic_t irq_received;
/** Protects user_irq_refcount and irq_mask_reg */
spinlock_t user_irq_lock;
- /** Refcount for i915_user_irq_get() versus i915_user_irq_put(). */
- int user_irq_refcount;
+
u32 trace_irq_seqno;
/** Cached value of IMR to avoid reads in updating the bitfield */
u32 irq_mask_reg;
@@ -508,18 +508,7 @@ typedef struct drm_i915_private {
*/
struct list_head shrink_list;
- /**
- * List of objects currently involved in rendering from the
- * ringbuffer.
- *
- * Includes buffers having the contents of their GPU caches
- * flushed, not necessarily primitives. last_rendering_seqno
- * represents when the rendering involved will be completed.
- *
- * A reference is held on the buffer while on this list.
- */
spinlock_t active_list_lock;
- struct list_head active_list;
/**
* List of objects which are not in the ringbuffer but which
@@ -557,12 +546,6 @@ typedef struct drm_i915_private {
struct list_head fence_list;
/**
- * List of breadcrumbs associated with GPU requests currently
- * outstanding.
- */
- struct list_head request_list;
-
- /**
* We leave the user IRQ off as much as possible,
* but this means that requests will finish and never
* be retired once the system goes idle. Set a timer to
@@ -571,18 +554,6 @@ typedef struct drm_i915_private {
*/
struct delayed_work retire_work;
- uint32_t next_gem_seqno;
-
- /**
- * Waiting sequence number, if any
- */
- uint32_t waiting_gem_seqno;
-
- /**
- * Last seq seen at irq time
- */
- uint32_t irq_gem_seqno;
-
/**
* Flag if the X Server, and thus DRM, is not currently in
* control of the device.
@@ -735,6 +706,9 @@ struct drm_i915_gem_object {
*/
int madv;
+ /* Which ring refers to is this object */
+ struct intel_ring_buffer *ring;
+
/**
* Number of crtcs where this object is currently the fb, but
* will be page flipped away on the next vblank. When it
@@ -756,6 +730,9 @@ struct drm_i915_gem_object {
* an emission time with seqnos for tracking how far ahead of the GPU we are.
*/
struct drm_i915_gem_request {
+ /** On Which ring this request is generated */
+ struct intel_ring_buffer *ring;
+
/** GEM sequence number associated with this request. */
uint32_t seqno;
@@ -820,9 +797,7 @@ extern int i915_irq_emit(struct drm_device *dev, void *data,
struct drm_file *file_priv);
extern int i915_irq_wait(struct drm_device *dev, void *data,
struct drm_file *file_priv);
-void i915_user_irq_get(struct drm_device *dev);
void i915_trace_irq_get(struct drm_device *dev, u32 seqno);
-void i915_user_irq_put(struct drm_device *dev);
extern void i915_enable_interrupt (struct drm_device *dev);
extern irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS);
@@ -846,7 +821,6 @@ extern void ironlake_enable_graphics_irq(drm_i915_private_t *dev_priv,
extern void ironlake_disable_graphics_irq(drm_i915_private_t *dev_priv,
u32 mask);
-
void
i915_enable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask);
@@ -917,11 +891,13 @@ void i915_gem_object_unpin(struct drm_gem_object *obj);
int i915_gem_object_unbind(struct drm_gem_object *obj);
void i915_gem_release_mmap(struct drm_gem_object *obj);
void i915_gem_lastclose(struct drm_device *dev);
-uint32_t i915_get_gem_seqno(struct drm_device *dev);
+uint32_t i915_get_gem_seqno(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
bool i915_seqno_passed(uint32_t seq1, uint32_t seq2);
int i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
int i915_gem_object_put_fence_reg(struct drm_gem_object *obj);
-void i915_gem_retire_requests(struct drm_device *dev);
+void i915_gem_retire_requests(struct drm_device *dev,
+ struct intel_ring_buffer *ring);
void i915_gem_retire_work_handler(struct work_struct *work);
void i915_gem_clflush_object(struct drm_gem_object *obj);
int i915_gem_object_set_domain(struct drm_gem_object *obj,
@@ -932,9 +908,13 @@ void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
int i915_gem_do_init(struct drm_device *dev, unsigned long start,
unsigned long end);
int i915_gem_idle(struct drm_device *dev);
-uint32_t i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
- uint32_t flush_domains);
-int i915_do_wait_request(struct drm_device *dev, uint32_t seqno, int interruptible);
+int i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
+ int interruptible,
+ struct intel_ring_buffer *ring);
+uint32_t i915_add_request(struct drm_device *dev,
+ struct drm_file *file_priv,
+ uint32_t flush_domains,
+ struct intel_ring_buffer *ring);
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj,
int write);
@@ -1121,7 +1101,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
(dev)->pci_device == 0x2A42 || \
(dev)->pci_device == 0x2E42)
-#define HAS_BSD(dev) (IS_IRONLAKE(dev) || IS_G4X(dev))
+#define HAS_BSD(dev) (IS_IRONLAKE(dev) || IS_G4X(dev))
#define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws)
/* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 450c67e..ec73a76 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -55,6 +55,8 @@ static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *o
struct drm_i915_gem_pwrite *args,
struct drm_file *file_priv);
+
+
static LIST_HEAD(shrink_list);
static DEFINE_SPINLOCK(shrink_list_lock);
@@ -1481,11 +1483,14 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
}
static void
-i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno)
+i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno,
+ struct intel_ring_buffer *ring)
{
struct drm_device *dev = obj->dev;
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+ BUG_ON(ring == NULL);
+ obj_priv->ring = ring;
/* Add a reference if we're newly entering the active list. */
if (!obj_priv->active) {
@@ -1494,8 +1499,7 @@ i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno)
}
/* Move from whatever list we were on to the tail of execution. */
spin_lock(&dev_priv->mm.active_list_lock);
- list_move_tail(&obj_priv->list,
- &dev_priv->mm.active_list);
+ list_move_tail(&obj_priv->list, &ring->active_list);
spin_unlock(&dev_priv->mm.active_list_lock);
obj_priv->last_rendering_seqno = seqno;
}
@@ -1548,6 +1552,7 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
BUG_ON(!list_empty(&obj_priv->gpu_write_list));
obj_priv->last_rendering_seqno = 0;
+ obj_priv->ring = NULL;
if (obj_priv->active) {
obj_priv->active = 0;
drm_gem_object_unreference(obj);
@@ -1557,7 +1562,8 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
static void
i915_gem_process_flushing_list(struct drm_device *dev,
- uint32_t flush_domains, uint32_t seqno)
+ uint32_t flush_domains, uint32_t seqno,
+ struct intel_ring_buffer *ring)
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj_priv, *next;
@@ -1568,12 +1574,13 @@ i915_gem_process_flushing_list(struct drm_device *dev,
struct drm_gem_object *obj = obj_priv->obj;
if ((obj->write_domain & flush_domains) ==
- obj->write_domain) {
+ obj->write_domain &&
+ obj_priv->ring->ring_flag == ring->ring_flag) {
uint32_t old_write_domain = obj->write_domain;
obj->write_domain = 0;
list_del_init(&obj_priv->gpu_write_list);
- i915_gem_object_move_to_active(obj, seqno);
+ i915_gem_object_move_to_active(obj, seqno, ring);
/* update the fence lru list */
if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
@@ -1597,7 +1604,7 @@ i915_gem_process_flushing_list(struct drm_device *dev,
*/
uint32_t
i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
- uint32_t flush_domains)
+ uint32_t flush_domains, struct intel_ring_buffer *ring)
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_file_private *i915_file_priv = NULL;
@@ -1612,28 +1619,13 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
if (request == NULL)
return 0;
- /* Grab the seqno we're going to make this request be, and bump the
- * next (skipping 0 so it can be the reserved no-seqno value).
- */
- seqno = dev_priv->mm.next_gem_seqno;
- dev_priv->mm.next_gem_seqno++;
- if (dev_priv->mm.next_gem_seqno == 0)
- dev_priv->mm.next_gem_seqno++;
-
- BEGIN_LP_RING(4);
- OUT_RING(MI_STORE_DWORD_INDEX);
- OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
- OUT_RING(seqno);
-
- OUT_RING(MI_USER_INTERRUPT);
- ADVANCE_LP_RING();
-
- DRM_DEBUG_DRIVER("%d\n", seqno);
+ seqno = ring->add_request(dev, ring, file_priv, flush_domains);
request->seqno = seqno;
+ request->ring = ring;
request->emitted_jiffies = jiffies;
- was_empty = list_empty(&dev_priv->mm.request_list);
- list_add_tail(&request->list, &dev_priv->mm.request_list);
+ was_empty = list_empty(&ring->request_list);
+ list_add_tail(&request->list, &ring->request_list);
if (i915_file_priv) {
list_add_tail(&request->client_list,
&i915_file_priv->mm.request_list);
@@ -1645,7 +1637,7 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
* domain we're flushing with our flush.
*/
if (flush_domains != 0)
- i915_gem_process_flushing_list(dev, flush_domains, seqno);
+ i915_gem_process_flushing_list(dev, flush_domains, seqno, ring);
if (!dev_priv->mm.suspended) {
mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
@@ -1662,18 +1654,16 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
* before signalling the CPU
*/
static uint32_t
-i915_retire_commands(struct drm_device *dev)
+i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring)
{
- uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
uint32_t flush_domains = 0;
/* The sampler always gets flushed on i965 (sigh) */
if (IS_I965G(dev))
flush_domains |= I915_GEM_DOMAIN_SAMPLER;
- BEGIN_LP_RING(2);
- OUT_RING(cmd);
- OUT_RING(0); /* noop */
- ADVANCE_LP_RING();
+
+ ring->flush(dev, ring,
+ I915_GEM_DOMAIN_COMMAND, flush_domains);
return flush_domains;
}
@@ -1693,11 +1683,11 @@ i915_gem_retire_request(struct drm_device *dev,
* by the ringbuffer to the flushing/inactive lists as appropriate.
*/
spin_lock(&dev_priv->mm.active_list_lock);
- while (!list_empty(&dev_priv->mm.active_list)) {
+ while (!list_empty(&request->ring->active_list)) {
struct drm_gem_object *obj;
struct drm_i915_gem_object *obj_priv;
- obj_priv = list_first_entry(&dev_priv->mm.active_list,
+ obj_priv = list_first_entry(&request->ring->active_list,
struct drm_i915_gem_object,
list);
obj = obj_priv->obj;
@@ -1706,6 +1696,7 @@ i915_gem_retire_request(struct drm_device *dev,
* list, then the oldest in the list must still be newer than
* this seqno.
*/
+
if (obj_priv->last_rendering_seqno != request->seqno)
goto out;
@@ -1744,39 +1735,37 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
}
uint32_t
-i915_get_gem_seqno(struct drm_device *dev)
+i915_get_gem_seqno(struct drm_device *dev, struct intel_ring_buffer *ring)
{
- drm_i915_private_t *dev_priv = dev->dev_private;
-
- return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX);
+ return ring->get_gem_seqno(dev, ring);
}
/**
* This function clears the request list as sequence numbers are passed.
*/
void
-i915_gem_retire_requests(struct drm_device *dev)
+i915_gem_retire_requests(struct drm_device *dev,
+ struct intel_ring_buffer *ring)
{
drm_i915_private_t *dev_priv = dev->dev_private;
uint32_t seqno;
- if (!dev_priv->render_ring.status_page.page_addr
- || list_empty(&dev_priv->mm.request_list))
+ if (!ring->status_page.page_addr || list_empty(&ring->request_list))
return;
- seqno = i915_get_gem_seqno(dev);
+ seqno = i915_get_gem_seqno(dev, ring);
- while (!list_empty(&dev_priv->mm.request_list)) {
+ while (!list_empty(&ring->request_list)) {
struct drm_i915_gem_request *request;
uint32_t retiring_seqno;
- request = list_first_entry(&dev_priv->mm.request_list,
+ request = list_first_entry(&ring->request_list,
struct drm_i915_gem_request,
list);
retiring_seqno = request->seqno;
if (i915_seqno_passed(seqno, retiring_seqno) ||
- atomic_read(&dev_priv->mm.wedged)) {
+ atomic_read(&dev_priv->mm.wedged)) {
i915_gem_retire_request(dev, request);
list_del(&request->list);
@@ -1788,7 +1777,7 @@ i915_gem_retire_requests(struct drm_device *dev)
if (unlikely (dev_priv->trace_irq_seqno &&
i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
- i915_user_irq_put(dev);
+ ring->user_irq_put(dev, ring);
dev_priv->trace_irq_seqno = 0;
}
}
@@ -1804,15 +1793,22 @@ i915_gem_retire_work_handler(struct work_struct *work)
dev = dev_priv->dev;
mutex_lock(&dev->struct_mutex);
- i915_gem_retire_requests(dev);
+ i915_gem_retire_requests(dev, &dev_priv->render_ring);
+
+ if (HAS_BSD(dev))
+ i915_gem_retire_requests(dev, &dev_priv->bsd_ring);
+
if (!dev_priv->mm.suspended &&
- !list_empty(&dev_priv->mm.request_list))
+ (!list_empty(&dev_priv->render_ring.request_list) ||
+ (HAS_BSD(dev) &&
+ !list_empty(&dev_priv->bsd_ring.request_list))))
queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
mutex_unlock(&dev->struct_mutex);
}
int
-i915_do_wait_request(struct drm_device *dev, uint32_t seqno, int interruptible)
+i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
+ int interruptible, struct intel_ring_buffer *ring)
{
drm_i915_private_t *dev_priv = dev->dev_private;
u32 ier;
@@ -1823,7 +1819,7 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno, int interruptible)
if (atomic_read(&dev_priv->mm.wedged))
return -EIO;
- if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
+ if (!i915_seqno_passed(ring->get_gem_seqno(dev, ring), seqno)) {
if (HAS_PCH_SPLIT(dev))
ier = I915_READ(DEIER) | I915_READ(GTIER);
else
@@ -1837,19 +1833,21 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno, int interruptible)
trace_i915_gem_request_wait_begin(dev, seqno);
- dev_priv->mm.waiting_gem_seqno = seqno;
- i915_user_irq_get(dev);
+ ring->waiting_gem_seqno = seqno;
+ ring->user_irq_get(dev, ring);
if (interruptible)
- ret = wait_event_interruptible(dev_priv->irq_queue,
- i915_seqno_passed(i915_get_gem_seqno(dev), seqno) ||
- atomic_read(&dev_priv->mm.wedged));
+ ret = wait_event_interruptible(ring->irq_queue,
+ i915_seqno_passed(
+ ring->get_gem_seqno(dev, ring), seqno)
+ || atomic_read(&dev_priv->mm.wedged));
else
- wait_event(dev_priv->irq_queue,
- i915_seqno_passed(i915_get_gem_seqno(dev), seqno) ||
- atomic_read(&dev_priv->mm.wedged));
+ wait_event(ring->irq_queue,
+ i915_seqno_passed(
+ ring->get_gem_seqno(dev, ring), seqno)
+ || atomic_read(&dev_priv->mm.wedged));
- i915_user_irq_put(dev);
- dev_priv->mm.waiting_gem_seqno = 0;
+ ring->user_irq_put(dev, ring);
+ ring->waiting_gem_seqno = 0;
trace_i915_gem_request_wait_end(dev, seqno);
}
@@ -1858,7 +1856,7 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno, int interruptible)
if (ret && ret != -ERESTARTSYS)
DRM_ERROR("%s returns %d (awaiting %d at %d)\n",
- __func__, ret, seqno, i915_get_gem_seqno(dev));
+ __func__, ret, seqno, ring->get_gem_seqno(dev, ring));
/* Directly dispatch request retiring. While we have the work queue
* to handle this, the waiter on a request often wants an associated
@@ -1866,7 +1864,7 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno, int interruptible)
* a separate wait queue to handle that.
*/
if (ret == 0)
- i915_gem_retire_requests(dev);
+ i915_gem_retire_requests(dev, ring);
return ret;
}
@@ -1876,9 +1874,10 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno, int interruptible)
* request and object lists appropriately for that event.
*/
static int
-i915_wait_request(struct drm_device *dev, uint32_t seqno)
+i915_wait_request(struct drm_device *dev, uint32_t seqno,
+ struct intel_ring_buffer *ring)
{
- return i915_do_wait_request(dev, seqno, 1);
+ return i915_do_wait_request(dev, seqno, 1, ring);
}
static void
@@ -1896,8 +1895,21 @@ i915_gem_flush(struct drm_device *dev,
if (HAS_BSD(dev))
dev_priv->bsd_ring.flush(dev, &dev_priv->bsd_ring,
- invalidate_domains,
- flush_domains);
+ invalidate_domains,
+ flush_domains);
+}
+
+static void
+i915_gem_flush_ring(struct drm_device *dev,
+ uint32_t invalidate_domains,
+ uint32_t flush_domains,
+ struct intel_ring_buffer *ring)
+{
+ if (flush_domains & I915_GEM_DOMAIN_CPU)
+ drm_agp_chipset_flush(dev);
+ ring->flush(dev, ring,
+ invalidate_domains,
+ flush_domains);
}
/**
@@ -1924,7 +1936,8 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj)
DRM_INFO("%s: object %p wait for seqno %08x\n",
__func__, obj, obj_priv->last_rendering_seqno);
#endif
- ret = i915_wait_request(dev, obj_priv->last_rendering_seqno);
+ ret = i915_wait_request(dev,
+ obj_priv->last_rendering_seqno, obj_priv->ring);
if (ret != 0)
return ret;
}
@@ -2040,11 +2053,14 @@ i915_gpu_idle(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
bool lists_empty;
- uint32_t seqno;
+ uint32_t seqno1, seqno2;
+ int ret;
spin_lock(&dev_priv->mm.active_list_lock);
lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
- list_empty(&dev_priv->mm.active_list);
+ list_empty(&dev_priv->render_ring.active_list) &&
+ (!HAS_BSD(dev)
+ || list_empty(&dev_priv->bsd_ring.active_list));
spin_unlock(&dev_priv->mm.active_list_lock);
if (lists_empty)
@@ -2052,11 +2068,25 @@ i915_gpu_idle(struct drm_device *dev)
/* Flush everything onto the inactive list. */
i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
- seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
- if (seqno == 0)
+ seqno1 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS,
+ &dev_priv->render_ring);
+ if (seqno1 == 0)
return -ENOMEM;
+ ret = i915_wait_request(dev, seqno1, &dev_priv->render_ring);
+
+ if (HAS_BSD(dev)) {
+ seqno2 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS,
+ &dev_priv->bsd_ring);
+ if (seqno2 == 0)
+ return -ENOMEM;
+
+ ret = i915_wait_request(dev, seqno2, &dev_priv->bsd_ring);
+ if (ret)
+ return ret;
+ }
+
- return i915_wait_request(dev, seqno);
+ return ret;
}
static int
@@ -2069,7 +2099,9 @@ i915_gem_evict_everything(struct drm_device *dev)
spin_lock(&dev_priv->mm.active_list_lock);
lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
list_empty(&dev_priv->mm.flushing_list) &&
- list_empty(&dev_priv->mm.active_list));
+ list_empty(&dev_priv->render_ring.active_list) &&
+ (!HAS_BSD(dev)
+ || list_empty(&dev_priv->bsd_ring.active_list)));
spin_unlock(&dev_priv->mm.active_list_lock);
if (lists_empty)
@@ -2089,7 +2121,9 @@ i915_gem_evict_everything(struct drm_device *dev)
spin_lock(&dev_priv->mm.active_list_lock);
lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
list_empty(&dev_priv->mm.flushing_list) &&
- list_empty(&dev_priv->mm.active_list));
+ list_empty(&dev_priv->render_ring.active_list) &&
+ (!HAS_BSD(dev)
+ || list_empty(&dev_priv->bsd_ring.active_list)));
spin_unlock(&dev_priv->mm.active_list_lock);
BUG_ON(!lists_empty);
@@ -2103,8 +2137,13 @@ i915_gem_evict_something(struct drm_device *dev, int min_size)
struct drm_gem_object *obj;
int ret;
+ struct intel_ring_buffer *render_ring = &dev_priv->render_ring;
+ struct intel_ring_buffer *bsd_ring = &dev_priv->bsd_ring;
for (;;) {
- i915_gem_retire_requests(dev);
+ i915_gem_retire_requests(dev, render_ring);
+
+ if (HAS_BSD(dev))
+ i915_gem_retire_requests(dev, bsd_ring);
/* If there's an inactive buffer available now, grab it
* and be done.
@@ -2128,14 +2167,30 @@ i915_gem_evict_something(struct drm_device *dev, int min_size)
* things, wait for the next to finish and hopefully leave us
* a buffer to evict.
*/
- if (!list_empty(&dev_priv->mm.request_list)) {
+ if (!list_empty(&render_ring->request_list)) {
+ struct drm_i915_gem_request *request;
+
+ request = list_first_entry(&render_ring->request_list,
+ struct drm_i915_gem_request,
+ list);
+
+ ret = i915_wait_request(dev,
+ request->seqno, request->ring);
+ if (ret)
+ return ret;
+
+ continue;
+ }
+
+ if (HAS_BSD(dev) && !list_empty(&bsd_ring->request_list)) {
struct drm_i915_gem_request *request;
- request = list_first_entry(&dev_priv->mm.request_list,
+ request = list_first_entry(&bsd_ring->request_list,
struct drm_i915_gem_request,
list);
- ret = i915_wait_request(dev, request->seqno);
+ ret = i915_wait_request(dev,
+ request->seqno, request->ring);
if (ret)
return ret;
@@ -2162,10 +2217,13 @@ i915_gem_evict_something(struct drm_device *dev, int min_size)
if (obj != NULL) {
uint32_t seqno;
- i915_gem_flush(dev,
+ i915_gem_flush_ring(dev,
+ obj->write_domain,
obj->write_domain,
- obj->write_domain);
- seqno = i915_add_request(dev, NULL, obj->write_domain);
+ obj_priv->ring);
+ seqno = i915_add_request(dev, NULL,
+ obj->write_domain,
+ obj_priv->ring);
if (seqno == 0)
return -ENOMEM;
continue;
@@ -2691,6 +2749,7 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
uint32_t old_write_domain;
+ struct drm_i915_gem_object *obj_priv = obj->driver_private;
if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
return;
@@ -2698,7 +2757,7 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
/* Queue the GPU write cache flushing we need. */
old_write_domain = obj->write_domain;
i915_gem_flush(dev, 0, obj->write_domain);
- (void) i915_add_request(dev, NULL, obj->write_domain);
+ (void) i915_add_request(dev, NULL, obj->write_domain, obj_priv->ring);
BUG_ON(obj->write_domain);
trace_i915_gem_object_change_domain(obj,
@@ -2838,7 +2897,10 @@ i915_gem_object_set_to_display_plane(struct drm_gem_object *obj)
DRM_INFO("%s: object %p wait for seqno %08x\n",
__func__, obj, obj_priv->last_rendering_seqno);
#endif
- ret = i915_do_wait_request(dev, obj_priv->last_rendering_seqno, 0);
+ ret = i915_do_wait_request(dev,
+ obj_priv->last_rendering_seqno,
+ 0,
+ obj_priv->ring);
if (ret != 0)
return ret;
}
@@ -3429,61 +3491,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
return 0;
}
-/** Dispatch a batchbuffer to the ring
- */
-static int
-i915_dispatch_gem_execbuffer(struct drm_device *dev,
- struct drm_i915_gem_execbuffer2 *exec,
- struct drm_clip_rect *cliprects,
- uint64_t exec_offset)
-{
- drm_i915_private_t *dev_priv = dev->dev_private;
- int nbox = exec->num_cliprects;
- int i = 0, count;
- uint32_t exec_start, exec_len;
-
- exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
- exec_len = (uint32_t) exec->batch_len;
-
- trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno + 1);
-
- count = nbox ? nbox : 1;
-
- for (i = 0; i < count; i++) {
- if (i < nbox) {
- int ret = i915_emit_box(dev, cliprects, i,
- exec->DR1, exec->DR4);
- if (ret)
- return ret;
- }
-
- if (IS_I830(dev) || IS_845G(dev)) {
- BEGIN_LP_RING(4);
- OUT_RING(MI_BATCH_BUFFER);
- OUT_RING(exec_start | MI_BATCH_NON_SECURE);
- OUT_RING(exec_start + exec_len - 4);
- OUT_RING(0);
- ADVANCE_LP_RING();
- } else {
- BEGIN_LP_RING(2);
- if (IS_I965G(dev)) {
- OUT_RING(MI_BATCH_BUFFER_START |
- (2 << 6) |
- MI_BATCH_NON_SECURE_I965);
- OUT_RING(exec_start);
- } else {
- OUT_RING(MI_BATCH_BUFFER_START |
- (2 << 6));
- OUT_RING(exec_start | MI_BATCH_NON_SECURE);
- }
- ADVANCE_LP_RING();
- }
- }
-
- /* XXX breadcrumb */
- return 0;
-}
-
/* Throttle our rendering by waiting until the ring has completed our requests
* emitted over 20 msec ago.
*
@@ -3512,7 +3519,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
if (time_after_eq(request->emitted_jiffies, recent_enough))
break;
- ret = i915_wait_request(dev, request->seqno);
+ ret = i915_wait_request(dev, request->seqno, request->ring);
if (ret != 0)
break;
}
@@ -3669,6 +3676,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
uint32_t seqno, flush_domains, reloc_index;
int pin_tries, flips;
+ struct intel_ring_buffer *ring = NULL;
+
#if WATCH_EXEC
DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
(int) args->buffers_ptr, args->buffer_count, args->batch_len);
@@ -3726,6 +3735,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto pre_mutex_err;
}
+ if (args->flags & I915_EXEC_BSD) {
+ BUG_ON(!HAS_BSD(dev));
+ ring = &dev_priv->bsd_ring;
+ } else {
+ ring = &dev_priv->render_ring;
+ }
+
/* Look up object handles */
flips = 0;
for (i = 0; i < args->buffer_count; i++) {
@@ -3859,9 +3875,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
i915_gem_flush(dev,
dev->invalidate_domains,
dev->flush_domains);
- if (dev->flush_domains & I915_GEM_GPU_DOMAINS)
+ if (dev->flush_domains & I915_GEM_GPU_DOMAINS) {
(void)i915_add_request(dev, file_priv,
- dev->flush_domains);
+ dev->flush_domains,
+ &dev_priv->render_ring);
+
+ if (HAS_BSD(dev))
+ (void)i915_add_request(dev, file_priv,
+ dev->flush_domains,
+ &dev_priv->bsd_ring);
+ }
}
for (i = 0; i < args->buffer_count; i++) {
@@ -3898,7 +3921,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
#endif
/* Exec the batchbuffer */
- ret = i915_dispatch_gem_execbuffer(dev, args, cliprects, exec_offset);
+ ret = ring->dispatch_gem_execbuffer(dev, ring, args,
+ cliprects, exec_offset);
if (ret) {
DRM_ERROR("dispatch failed %d\n", ret);
goto err;
@@ -3908,7 +3932,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
* Ensure that the commands in the batch buffer are
* finished before the interrupt fires
*/
- flush_domains = i915_retire_commands(dev);
+ flush_domains = i915_retire_commands(dev, ring);
i915_verify_inactive(dev, __FILE__, __LINE__);
@@ -3919,12 +3943,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
* *some* interrupts representing completion of buffers that we can
* wait on when trying to clear up gtt space).
*/
- seqno = i915_add_request(dev, file_priv, flush_domains);
+ seqno = i915_add_request(dev, file_priv, flush_domains, ring);
BUG_ON(seqno == 0);
for (i = 0; i < args->buffer_count; i++) {
struct drm_gem_object *obj = object_list[i];
+ obj_priv = obj->driver_private;
- i915_gem_object_move_to_active(obj, seqno);
+ i915_gem_object_move_to_active(obj, seqno, ring);
#if WATCH_LRU
DRM_INFO("%s: move to exec list %p\n", __func__, obj);
#endif
@@ -4036,7 +4061,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
exec2.DR4 = args->DR4;
exec2.num_cliprects = args->num_cliprects;
exec2.cliprects_ptr = args->cliprects_ptr;
- exec2.flags = 0;
+ exec2.flags = I915_EXEC_RENDER;
ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list);
if (!ret) {
@@ -4275,6 +4300,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
struct drm_i915_gem_busy *args = data;
struct drm_gem_object *obj;
struct drm_i915_gem_object *obj_priv;
+ drm_i915_private_t *dev_priv = dev->dev_private;
obj = drm_gem_object_lookup(dev, file_priv, args->handle);
if (obj == NULL) {
@@ -4289,7 +4315,10 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
* actually unmasked, and our working set ends up being larger than
* required.
*/
- i915_gem_retire_requests(dev);
+ i915_gem_retire_requests(dev, &dev_priv->render_ring);
+
+ if (HAS_BSD(dev))
+ i915_gem_retire_requests(dev, &dev_priv->bsd_ring);
obj_priv = to_intel_bo(obj);
/* Don't count being on the flushing list against the object being
@@ -4451,7 +4480,9 @@ i915_gem_idle(struct drm_device *dev)
mutex_lock(&dev->struct_mutex);
if (dev_priv->mm.suspended ||
- dev_priv->render_ring.gem_object == NULL) {
+ (dev_priv->render_ring.gem_object == NULL) ||
+ (HAS_BSD(dev) &&
+ dev_priv->bsd_ring.gem_object == NULL)) {
mutex_unlock(&dev->struct_mutex);
return 0;
}
@@ -4503,10 +4534,11 @@ i915_gem_init_ringbuffer(struct drm_device *dev)
0, PAGE_SIZE);
}
ret = intel_init_ring_buffer(dev, &dev_priv->render_ring);
- if (!ret && HAS_BSD(dev)) {
+ if (HAS_BSD(dev)) {
dev_priv->bsd_ring = bsd_ring;
ret = intel_init_ring_buffer(dev, &dev_priv->bsd_ring);
}
+
return ret;
}
@@ -4514,10 +4546,10 @@ void
i915_gem_cleanup_ringbuffer(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
+
intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
if (HAS_BSD(dev))
intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
-
}
int
@@ -4545,12 +4577,14 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
}
spin_lock(&dev_priv->mm.active_list_lock);
- BUG_ON(!list_empty(&dev_priv->mm.active_list));
+ BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
+ BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.active_list));
spin_unlock(&dev_priv->mm.active_list_lock);
BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
- BUG_ON(!list_empty(&dev_priv->mm.request_list));
+ BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
+ BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.request_list));
mutex_unlock(&dev->struct_mutex);
drm_irq_install(dev);
@@ -4589,15 +4623,20 @@ i915_gem_load(struct drm_device *dev)
drm_i915_private_t *dev_priv = dev->dev_private;
spin_lock_init(&dev_priv->mm.active_list_lock);
- INIT_LIST_HEAD(&dev_priv->mm.active_list);
INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list);
INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
- INIT_LIST_HEAD(&dev_priv->mm.request_list);
INIT_LIST_HEAD(&dev_priv->mm.fence_list);
+ INIT_LIST_HEAD(&dev_priv->render_ring.active_list);
+ INIT_LIST_HEAD(&dev_priv->render_ring.request_list);
+
+ if (HAS_BSD(dev)) {
+ INIT_LIST_HEAD(&dev_priv->bsd_ring.active_list);
+ INIT_LIST_HEAD(&dev_priv->bsd_ring.request_list);
+ }
+
INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
i915_gem_retire_work_handler);
- dev_priv->mm.next_gem_seqno = 1;
spin_lock(&shrink_list_lock);
list_add(&dev_priv->mm.shrink_list, &shrink_list);
@@ -4860,8 +4899,10 @@ i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask)
continue;
spin_unlock(&shrink_list_lock);
+ i915_gem_retire_requests(dev, &dev_priv->render_ring);
- i915_gem_retire_requests(dev);
+ if (HAS_BSD(dev))
+ i915_gem_retire_requests(dev, &dev_priv->bsd_ring);
list_for_each_entry_safe(obj_priv, next_obj,
&dev_priv->mm.inactive_list,
diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c
index 35507cf..830680b 100644
--- a/drivers/gpu/drm/i915/i915_gem_debug.c
+++ b/drivers/gpu/drm/i915/i915_gem_debug.c
@@ -106,12 +106,21 @@ i915_dump_lru(struct drm_device *dev, const char *where)
DRM_INFO("active list %s {\n", where);
spin_lock(&dev_priv->mm.active_list_lock);
- list_for_each_entry(obj_priv, &dev_priv->mm.active_list,
+ list_for_each_entry(obj_priv, &dev_priv->render_ring.active_list,
list)
{
DRM_INFO(" %p: %08x\n", obj_priv,
obj_priv->last_rendering_seqno);
}
+
+ if (HAS_BSD(dev)) {
+ list_for_each_entry(obj_priv, &dev_priv->bsd_ring.active_list,
+ list)
+ {
+ DRM_INFO(" %p: %08x\n", obj_priv,
+ obj_priv->last_rendering_seqno);
+ }
+ }
spin_unlock(&dev_priv->mm.active_list_lock);
DRM_INFO("}\n");
DRM_INFO("flushing list %s {\n", where);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 1e3299a..5312571 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -52,7 +52,7 @@
I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
/** Interrupts that we mask and unmask at runtime. */
-#define I915_INTERRUPT_ENABLE_VAR (I915_USER_INTERRUPT)
+#define I915_INTERRUPT_ENABLE_VAR (I915_USER_INTERRUPT | I915_BSD_USER_INTERRUPT)
#define I915_PIPE_VBLANK_STATUS (PIPE_START_VBLANK_INTERRUPT_STATUS |\
PIPE_VBLANK_INTERRUPT_STATUS)
@@ -330,7 +330,7 @@ irqreturn_t ironlake_irq_handler(struct drm_device *dev)
int ret = IRQ_NONE;
u32 de_iir, gt_iir, de_ier, pch_iir;
struct drm_i915_master_private *master_priv;
-
+ struct intel_ring_buffer *render_ring = &dev_priv->render_ring;
/* disable master interrupt before clearing iir */
de_ier = I915_READ(DEIER);
I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
@@ -353,14 +353,17 @@ irqreturn_t ironlake_irq_handler(struct drm_device *dev)
}
if (gt_iir & GT_USER_INTERRUPT) {
- u32 seqno = i915_get_gem_seqno(dev);
- dev_priv->mm.irq_gem_seqno = seqno;
+ u32 seqno = render_ring->get_gem_seqno(dev, render_ring);
+ render_ring->irq_gem_seqno = seqno;
trace_i915_gem_request_complete(dev, seqno);
- DRM_WAKEUP(&dev_priv->irq_queue);
+ DRM_WAKEUP(&dev_priv->render_ring.irq_queue);
dev_priv->hangcheck_count = 0;
mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
}
+ if (gt_iir & GT_BSD_USER_INTERRUPT)
+ DRM_WAKEUP(&dev_priv->bsd_ring.irq_queue);
+
if (de_iir & DE_GSE)
ironlake_opregion_gse_intr(dev);
@@ -552,7 +555,6 @@ i915_ringbuffer_last_batch(struct drm_device *dev)
return bbaddr;
}
-
/**
* i915_capture_error_state - capture an error record for later analysis
* @dev: drm device
@@ -584,7 +586,7 @@ static void i915_capture_error_state(struct drm_device *dev)
return;
}
- error->seqno = i915_get_gem_seqno(dev);
+ error->seqno = i915_get_gem_seqno(dev, &dev_priv->render_ring);
error->eir = I915_READ(EIR);
error->pgtbl_er = I915_READ(PGTBL_ER);
error->pipeastat = I915_READ(PIPEASTAT);
@@ -612,7 +614,8 @@ static void i915_capture_error_state(struct drm_device *dev)
batchbuffer[0] = NULL;
batchbuffer[1] = NULL;
count = 0;
- list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) {
+ list_for_each_entry(obj_priv,
+ &dev_priv->render_ring.active_list, list) {
struct drm_gem_object *obj = obj_priv->obj;
if (batchbuffer[0] == NULL &&
@@ -649,7 +652,8 @@ static void i915_capture_error_state(struct drm_device *dev)
if (error->active_bo) {
int i = 0;
- list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) {
+ list_for_each_entry(obj_priv,
+ &dev_priv->render_ring.active_list, list) {
struct drm_gem_object *obj = obj_priv->obj;
error->active_bo[i].size = obj->size;
@@ -827,7 +831,7 @@ static void i915_handle_error(struct drm_device *dev, bool wedged)
/*
* Wakeup waiting processes so they don't hang
*/
- DRM_WAKEUP(&dev_priv->irq_queue);
+ DRM_WAKEUP(&dev_priv->render_ring.irq_queue);
}
queue_work(dev_priv->wq, &dev_priv->error_work);
@@ -846,6 +850,7 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
unsigned long irqflags;
int irq_received;
int ret = IRQ_NONE;
+ struct intel_ring_buffer *render_ring = &dev_priv->render_ring;
atomic_inc(&dev_priv->irq_received);
@@ -926,14 +931,18 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
}
if (iir & I915_USER_INTERRUPT) {
- u32 seqno = i915_get_gem_seqno(dev);
- dev_priv->mm.irq_gem_seqno = seqno;
+ u32 seqno =
+ render_ring->get_gem_seqno(dev, render_ring);
+ render_ring->irq_gem_seqno = seqno;
trace_i915_gem_request_complete(dev, seqno);
- DRM_WAKEUP(&dev_priv->irq_queue);
+ DRM_WAKEUP(&dev_priv->render_ring.irq_queue);
dev_priv->hangcheck_count = 0;
mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
}
+ if (HAS_BSD(dev) && (iir & I915_BSD_USER_INTERRUPT))
+ DRM_WAKEUP(&dev_priv->bsd_ring.irq_queue);
+
if (iir & I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT)
intel_prepare_page_flip(dev, 0);
@@ -982,9 +991,7 @@ static int i915_emit_irq(struct drm_device * dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
-
i915_kernel_lost_context(dev);
-
DRM_DEBUG_DRIVER("\n");
dev_priv->counter++;
@@ -1003,43 +1010,13 @@ static int i915_emit_irq(struct drm_device * dev)
return dev_priv->counter;
}
-void i915_user_irq_get(struct drm_device *dev)
-{
- drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
- unsigned long irqflags;
-
- spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
- if (dev->irq_enabled && (++dev_priv->user_irq_refcount == 1)) {
- if (HAS_PCH_SPLIT(dev))
- ironlake_enable_graphics_irq(dev_priv, GT_USER_INTERRUPT);
- else
- i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
- }
- spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags);
-}
-
-void i915_user_irq_put(struct drm_device *dev)
-{
- drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
- unsigned long irqflags;
-
- spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
- BUG_ON(dev->irq_enabled && dev_priv->user_irq_refcount <= 0);
- if (dev->irq_enabled && (--dev_priv->user_irq_refcount == 0)) {
- if (HAS_PCH_SPLIT(dev))
- ironlake_disable_graphics_irq(dev_priv, GT_USER_INTERRUPT);
- else
- i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
- }
- spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags);
-}
-
void i915_trace_irq_get(struct drm_device *dev, u32 seqno)
{
drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+ struct intel_ring_buffer *render_ring = &dev_priv->render_ring;
if (dev_priv->trace_irq_seqno == 0)
- i915_user_irq_get(dev);
+ render_ring->user_irq_get(dev, render_ring);
dev_priv->trace_irq_seqno = seqno;
}
@@ -1049,6 +1026,7 @@ static int i915_wait_irq(struct drm_device * dev, int irq_nr)
drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
int ret = 0;
+ struct intel_ring_buffer *render_ring = &dev_priv->render_ring;
DRM_DEBUG_DRIVER("irq_nr=%d breadcrumb=%d\n", irq_nr,
READ_BREADCRUMB(dev_priv));
@@ -1062,10 +1040,10 @@ static int i915_wait_irq(struct drm_device * dev, int irq_nr)
if (master_priv->sarea_priv)
master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
- i915_user_irq_get(dev);
- DRM_WAIT_ON(ret, dev_priv->irq_queue, 3 * DRM_HZ,
+ render_ring->user_irq_get(dev, render_ring);
+ DRM_WAIT_ON(ret, dev_priv->render_ring.irq_queue, 3 * DRM_HZ,
READ_BREADCRUMB(dev_priv) >= irq_nr);
- i915_user_irq_put(dev);
+ render_ring->user_irq_put(dev, render_ring);
if (ret == -EBUSY) {
DRM_ERROR("EBUSY -- rec: %d emitted: %d\n",
@@ -1230,9 +1208,12 @@ int i915_vblank_swap(struct drm_device *dev, void *data,
return -EINVAL;
}
-struct drm_i915_gem_request *i915_get_tail_request(struct drm_device *dev) {
+struct drm_i915_gem_request *
+i915_get_tail_request(struct drm_device *dev)
+{
drm_i915_private_t *dev_priv = dev->dev_private;
- return list_entry(dev_priv->mm.request_list.prev, struct drm_i915_gem_request, list);
+ return list_entry(dev_priv->render_ring.request_list.prev,
+ struct drm_i915_gem_request, list);
}
/**
@@ -1257,8 +1238,10 @@ void i915_hangcheck_elapsed(unsigned long data)
acthd = I915_READ(ACTHD_I965);
/* If all work is done then ACTHD clearly hasn't advanced. */
- if (list_empty(&dev_priv->mm.request_list) ||
- i915_seqno_passed(i915_get_gem_seqno(dev), i915_get_tail_request(dev)->seqno)) {
+ if (list_empty(&dev_priv->render_ring.request_list) ||
+ i915_seqno_passed(i915_get_gem_seqno(dev,
+ &dev_priv->render_ring),
+ i915_get_tail_request(dev)->seqno)) {
dev_priv->hangcheck_count = 0;
return;
}
@@ -1311,7 +1294,7 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
/* enable kind of interrupts always enabled */
u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT |
DE_PLANEA_FLIP_DONE | DE_PLANEB_FLIP_DONE;
- u32 render_mask = GT_USER_INTERRUPT;
+ u32 render_mask = GT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT;
u32 hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG |
SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG;
@@ -1388,7 +1371,10 @@ int i915_driver_irq_postinstall(struct drm_device *dev)
u32 enable_mask = I915_INTERRUPT_ENABLE_FIX | I915_INTERRUPT_ENABLE_VAR;
u32 error_mask;
- DRM_INIT_WAITQUEUE(&dev_priv->irq_queue);
+ DRM_INIT_WAITQUEUE(&dev_priv->render_ring.irq_queue);
+
+ if (HAS_BSD(dev))
+ DRM_INIT_WAITQUEUE(&dev_priv->bsd_ring.irq_queue);
dev_priv->vblank_pipe = DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 2e490ad..d7ed6bc 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -212,6 +212,8 @@ static int intel_overlay_on(struct intel_overlay *overlay)
{
struct drm_device *dev = overlay->dev;
int ret;
+ drm_i915_private_t *dev_priv = dev->dev_private;
+
BUG_ON(overlay->active);
overlay->active = 1;
@@ -224,11 +226,13 @@ static int intel_overlay_on(struct intel_overlay *overlay)
OUT_RING(MI_NOOP);
ADVANCE_LP_RING();
- overlay->last_flip_req = i915_add_request(dev, NULL, 0);
+ overlay->last_flip_req =
+ i915_add_request(dev, NULL, 0, &dev_priv->render_ring);
if (overlay->last_flip_req == 0)
return -ENOMEM;
- ret = i915_do_wait_request(dev, overlay->last_flip_req, 1);
+ ret = i915_do_wait_request(dev,
+ overlay->last_flip_req, 1, &dev_priv->render_ring);
if (ret != 0)
return ret;
@@ -261,7 +265,8 @@ static void intel_overlay_continue(struct intel_overlay *overlay,
OUT_RING(flip_addr);
ADVANCE_LP_RING();
- overlay->last_flip_req = i915_add_request(dev, NULL, 0);
+ overlay->last_flip_req =
+ i915_add_request(dev, NULL, 0, &dev_priv->render_ring);
}
static int intel_overlay_wait_flip(struct intel_overlay *overlay)
@@ -272,7 +277,8 @@ static int intel_overlay_wait_flip(struct intel_overlay *overlay)
u32 tmp;
if (overlay->last_flip_req != 0) {
- ret = i915_do_wait_request(dev, overlay->last_flip_req, 1);
+ ret = i915_do_wait_request(dev, overlay->last_flip_req,
+ 1, &dev_priv->render_ring);
if (ret == 0) {
overlay->last_flip_req = 0;
@@ -291,11 +297,13 @@ static int intel_overlay_wait_flip(struct intel_overlay *overlay)
OUT_RING(MI_NOOP);
ADVANCE_LP_RING();
- overlay->last_flip_req = i915_add_request(dev, NULL, 0);
+ overlay->last_flip_req =
+ i915_add_request(dev, NULL, 0, &dev_priv->render_ring);
if (overlay->last_flip_req == 0)
return -ENOMEM;
- ret = i915_do_wait_request(dev, overlay->last_flip_req, 1);
+ ret = i915_do_wait_request(dev, overlay->last_flip_req,
+ 1, &dev_priv->render_ring);
if (ret != 0)
return ret;
@@ -309,6 +317,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
{
u32 flip_addr = overlay->flip_addr;
struct drm_device *dev = overlay->dev;
+ drm_i915_private_t *dev_priv = dev->dev_private;
int ret;
BUG_ON(!overlay->active);
@@ -329,11 +338,13 @@ static int intel_overlay_off(struct intel_overlay *overlay)
OUT_RING(MI_NOOP);
ADVANCE_LP_RING();
- overlay->last_flip_req = i915_add_request(dev, NULL, 0);
+ overlay->last_flip_req =
+ i915_add_request(dev, NULL, 0, &dev_priv->render_ring);
if (overlay->last_flip_req == 0)
return -ENOMEM;
- ret = i915_do_wait_request(dev, overlay->last_flip_req, 1);
+ ret = i915_do_wait_request(dev, overlay->last_flip_req,
+ 1, &dev_priv->render_ring);
if (ret != 0)
return ret;
@@ -347,11 +358,13 @@ static int intel_overlay_off(struct intel_overlay *overlay)
OUT_RING(MI_NOOP);
ADVANCE_LP_RING();
- overlay->last_flip_req = i915_add_request(dev, NULL, 0);
+ overlay->last_flip_req =
+ i915_add_request(dev, NULL, 0, &dev_priv->render_ring);
if (overlay->last_flip_req == 0)
return -ENOMEM;
- ret = i915_do_wait_request(dev, overlay->last_flip_req, 1);
+ ret = i915_do_wait_request(dev, overlay->last_flip_req,
+ 1, &dev_priv->render_ring);
if (ret != 0)
return ret;
@@ -384,6 +397,7 @@ int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay,
{
struct drm_device *dev = overlay->dev;
struct drm_gem_object *obj;
+ drm_i915_private_t *dev_priv = dev->dev_private;
u32 flip_addr;
int ret;
@@ -391,12 +405,14 @@ int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay,
return -EIO;
if (overlay->last_flip_req == 0) {
- overlay->last_flip_req = i915_add_request(dev, NULL, 0);
+ overlay->last_flip_req =
+ i915_add_request(dev, NULL, 0, &dev_priv->render_ring);
if (overlay->last_flip_req == 0)
return -ENOMEM;
}
- ret = i915_do_wait_request(dev, overlay->last_flip_req, interruptible);
+ ret = i915_do_wait_request(dev, overlay->last_flip_req,
+ interruptible, &dev_priv->render_ring);
if (ret != 0)
return ret;
@@ -420,12 +436,13 @@ int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay,
OUT_RING(MI_NOOP);
ADVANCE_LP_RING();
- overlay->last_flip_req = i915_add_request(dev, NULL, 0);
+ overlay->last_flip_req = i915_add_request(dev, NULL,
+ 0, &dev_priv->render_ring);
if (overlay->last_flip_req == 0)
return -ENOMEM;
ret = i915_do_wait_request(dev, overlay->last_flip_req,
- interruptible);
+ interruptible, &dev_priv->render_ring);
if (ret != 0)
return ret;
--
1.7.1
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-05 18:23 ` Daniel Vetter
@ 2010-05-06 2:25 ` Zou, Nanhai
0 siblings, 0 replies; 24+ messages in thread
From: Zou, Nanhai @ 2010-05-06 2:25 UTC (permalink / raw)
To: Daniel Vetter; +Cc: Intel GFX, Anholt, Eric, Anholt@freedesktop.org
I have resent the patches.
Please help review.
Thanks
Zou Nan hai
-----Original Message-----
From: Daniel Vetter [mailto:daniel@ffwll.ch]
Sent: 2010年5月6日 2:23
To: Zou, Nanhai
Cc: Anholt@freedesktop.org; Anholt, Eric; Intel GFX
Subject: Re: [Intel-gfx] [PATCH 1/4] introduce intel_ring_buffer structure
Looks like you've forgotten to actually add intel_ringbuffer.c - it's
missing form the patch ;)
Please resend.
Cheers, Daniel
--
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-06 1:20 [PATCH 1/4] introduce intel_ring_buffer structure Zou Nan hai
` (2 preceding siblings ...)
2010-05-06 1:20 ` [PATCH 4/4] adapt intel_ring_buffer into gem Zou Nan hai
@ 2010-05-07 21:34 ` Eric Anholt
2010-05-08 18:15 ` Thomas Bächler
2010-05-11 22:24 ` Daniel Vetter
4 siblings, 1 reply; 24+ messages in thread
From: Eric Anholt @ 2010-05-07 21:34 UTC (permalink / raw)
To: Zou Nan hai, Intel GFX
[-- Attachment #1.1: Type: text/plain, Size: 6664 bytes --]
On Thu, 6 May 2010 09:20:02 +0800, Zou Nan hai <nanhai.zou@intel.com> wrote:
> introduce intel_ring_buffer structure.
> sequential number, IRQ logic and hardware status page
> were included in the intel_ring_buffer structure
I tried to go apply this patch series once again. The first one
actually applies now. The second produces massive rejects, whether I apply
against linus master or drm-intel-next.
Please rebase against drm-intel-next, since that's where it will land.
I've gone ahead and merged master to -next now so that the final rebase
of this patch series (I sure hope) can be well-tested on that branch
with the PIPE_CONTROL fix.
And while you're fixing up the rebase to make sure it all works, a
couple of fixes:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> new file mode 100644
> index 0000000..65c540b
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -0,0 +1,576 @@
> +/*
> + * Copyright © 20010 Intel Corporation
It's 2010.
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + * Zou Nan hai <nanhai.zou@intel.com>
> + * Xiang Hai hao<haihao.xiang@intel.com>
> + *
> + */
> +#include "drmP.h"
> +#include "drm.h"
> +#include "i915_drv.h"
> +#include "i915_drm.h"
> +#include "i915_trace.h"
> +
> +#define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
Common definitions should go in header files (i915_drv.h here).
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> new file mode 100644
> index 0000000..e3b40d1
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -0,0 +1,119 @@
> +#ifndef _INTEL_RINGBUFFER_H_
> +#define _INTEL_RINGBUFFER_H_
> +
> +struct intel_hw_status_page {
> + void *page_addr;
> + unsigned int gfx_addr;
> + struct drm_gem_object *obj;
> +};
> +
> +struct drm_i915_gem_execbuffer2;
> +struct intel_ring_buffer {
> + const char *name;
> + unsigned int ring_flag;
> + unsigned long size;
> + unsigned int alignment;
> + void *virtual_start;
> + struct drm_device *dev;
> + struct drm_gem_object *gem_object;
> +
> + unsigned int head;
> + unsigned int tail;
> + unsigned int space;
> + u32 next_seqno;
> + struct intel_hw_status_page status_page;
> +
> + u32 irq_gem_seqno; /* last seq seem at irq time */
> + u32 waiting_gem_seqno;
> + int user_irq_refcount;
> + void (*user_irq_get)(struct drm_device *dev,
> + struct intel_ring_buffer *ring);
> + void (*user_irq_put)(struct drm_device *dev,
> + struct intel_ring_buffer *ring);
> + void (*setup_status_page)(struct drm_device *dev,
> + struct intel_ring_buffer *ring);
> +
> + int (*init)(struct drm_device *dev,
> + struct intel_ring_buffer *ring);
> +
> + unsigned int (*get_head)(struct drm_device *dev,
> + struct intel_ring_buffer *ring);
> + unsigned int (*get_tail)(struct drm_device *dev,
> + struct intel_ring_buffer *ring);
> + unsigned int (*get_active_head)(struct drm_device *dev,
> + struct intel_ring_buffer *ring);
> + void (*advance_ring)(struct drm_device *dev,
> + struct intel_ring_buffer *ring);
> + void (*flush)(struct drm_device *dev,
> + struct intel_ring_buffer *ring,
> + u32 invalidate_domains,
> + u32 flush_domains);
> + u32 (*add_request)(struct drm_device *dev,
> + struct intel_ring_buffer *ring,
> + struct drm_file *file_priv,
> + u32 flush_domains);
> + u32 (*get_gem_seqno)(struct drm_device *dev,
> + struct intel_ring_buffer *ring);
> + int (*dispatch_gem_execbuffer)(struct drm_device *dev,
> + struct intel_ring_buffer *ring,
> + struct drm_i915_gem_execbuffer2 *exec,
> + struct drm_clip_rect *cliprects,
> + uint64_t exec_offset);
> +
> + /**
> + * List of objects currently involved in rendering from the
> + * ringbuffer.
> + *
> + * Includes buffers having the contents of their GPU caches
> + * flushed, not necessarily primitives. last_rendering_seqno
> + * represents when the rendering involved will be completed.
> + *
> + * A reference is held on the buffer while on this list.
> + */
> + struct list_head active_list;
> +
> + /**
> + * List of breadcrumbs associated with GPU requests currently
> + * outstanding.
> + */
> + struct list_head request_list;
> +
> + wait_queue_head_t irq_queue;
> + drm_local_map_t map;
> +};
> +
> +static inline u32
> +intel_read_status_page(struct intel_ring_buffer *ring,
> + int reg)
> +{
> + u32 *regs = ring->status_page.page_addr;
> + return regs[reg];
> +}
> +
> +int intel_init_ring_buffer(struct drm_device *dev,
> + struct intel_ring_buffer *ring);
> +void intel_cleanup_ring_buffer(struct drm_device *dev,
> + struct intel_ring_buffer *ring);
> +int intel_wait_ring_buffer(struct drm_device *dev,
> + struct intel_ring_buffer *ring, int n);
> +int intel_wrap_ring_buffer(struct drm_device *dev,
> + struct intel_ring_buffer *ring);
> +void intel_ring_begin(struct drm_device *dev,
> + struct intel_ring_buffer *ring, int n);
> +void intel_ring_emit(struct drm_device *dev,
> + struct intel_ring_buffer *ring, u32 data);
> +void intel_fill_struct(struct drm_device *dev,
> + struct intel_ring_buffer *ring,
> + void *data,
> + unsigned int len);
> +void intel_ring_advance(struct drm_device *dev,
> + struct intel_ring_buffer *ring);
Lots of trailing whitespace in this hunk.
[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-07 21:34 ` [PATCH 1/4] introduce intel_ring_buffer structure Eric Anholt
@ 2010-05-08 18:15 ` Thomas Bächler
0 siblings, 0 replies; 24+ messages in thread
From: Thomas Bächler @ 2010-05-08 18:15 UTC (permalink / raw)
To: Eric Anholt; +Cc: Intel GFX
[-- Attachment #1.1: Type: text/plain, Size: 796 bytes --]
Am 07.05.2010 23:34, schrieb Eric Anholt:
> On Thu, 6 May 2010 09:20:02 +0800, Zou Nan hai <nanhai.zou@intel.com> wrote:
>> introduce intel_ring_buffer structure.
>> sequential number, IRQ logic and hardware status page
>> were included in the intel_ring_buffer structure
>
> I tried to go apply this patch series once again. The first one
> actually applies now. The second produces massive rejects, whether I apply
> against linus master or drm-intel-next.
>
> Please rebase against drm-intel-next, since that's where it will land.
I'm glad I'm not the only one. I wanted to test the h264 decoding and
actually managed to apply an old version of the first patch - but I also
got the rejects on the second, thought it was my fault though, for using
the wrong tree or so.
[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-06 1:20 [PATCH 1/4] introduce intel_ring_buffer structure Zou Nan hai
` (3 preceding siblings ...)
2010-05-07 21:34 ` [PATCH 1/4] introduce intel_ring_buffer structure Eric Anholt
@ 2010-05-11 22:24 ` Daniel Vetter
2010-05-14 1:39 ` Zou, Nanhai
4 siblings, 1 reply; 24+ messages in thread
From: Daniel Vetter @ 2010-05-11 22:24 UTC (permalink / raw)
To: Zou Nan hai; +Cc: Intel GFX, Anholt Eric
Hi all,
Ok, here's my try at a review. Sorry for the rather long delay.
First things first, please put on your asbestos suits, this one's gonna be
rough;) Second: Your patch is still a pain to review for the following
reasons:
- IMHO you split-up made things worse: The completely rewritten render
ring implementation is introduced in patch 1, but only really fully used
when all 4 patches are applied. Now instead of hunting around in one
monster patch (which was hard) I have to hunt around in four different
patches to make sense of changes (rather impossible). Similarly other
stuff is splattered all over the series, e.g. I've found definitions for
the bsd ring in patch 1 instead of patch 3.
- Your doing tons of (at least at first glance) superfluously
search&replaces. Yep, inconsistent naming is ugly, but patches bloated
with tons of such changes are even uglier. Some examples:
s/ring/render_ring/
s/drm_i915_ring_buffer_t/intel_ring_buffer/
There are more. If you think these changes really are beneficial, please
do one patch per s&r with nothing else in it.
Now for the more specific issues:
- In my previous review I've proposed to replace the gpu execution
breadcrumb with something like this
struct intel_gpu_breadcrumb {
uint32_t seqno : 30;
uint32_t domain : 2;
};
Compared to your approach this doesn't waste a pointer per gem object.
Further it can be used for execution domains like keeping track of
pageflips (tossed around some ideas with Kristian on irc) or to batch up
gtt chipset flushes (probably needed to make my i855 cache coherency stuff
fast again). So yes, I'd like to see this.
- struct intel_ring_buffer looks massively overdesigned in your patch. I
think a more lightweight approach that leaves as much as possible of the
render ring implementation untouched (and doesn't move it around) is
better for at least two reasons:
- Your abstraction requires quite a few automatic changes all over. This
needlessly bloats the patch and makes finding the interesting stuff
harder.
- Greatly reduced risk of introducing regressions.
There are also a few places that look rather fishy. Mostly I simply
couldn't follow anymore what's going on due to the reasons laid out above.
Anyway, here they are:
- You (seem to) move around kernel_lost_context. For a feature that's
currently only supported for hw that was never supported with ums, this
is either fishy or needs a big comment.
- In i915_gem_flush you simply flush both ringbuffers. Now either these
two have independent caches (and only one needs to be flushed, gem is
not supposed to do unnecessary flushes) or they are coherent and only
one flush is required, period. Current code looks like it tries to paper
over some coherency issues (and I've learned to loathe these buggers).
Related to this: The active list gets split up between the to
ringbuffers. The flushing list is still global. This looks inconsistent.
- I haven't found out how synchronization between the bsd and the render
ringbuffer is handled. And if the answer is "userspace takes care" I'm
not gonna like it.
With this off the table, I still have to do some venting ;) The idea
behind splitting patches up is:
- to make life easier for reviewers. IHMO you've failed in that regard.
- to make bisecting regressions easy. Given that I've found gimmicks like
tatus_page_dmah->vaddr;
- memset(dev_priv->render_ring.status_page.page_addr
+ memset(dev_priv->render_ring.status_page.page_addr,
0, PAGE_SIZE);
}
in your patches (note the re-added semicolon at the end) your patches
definitely fail at that - they won't even compile in between. And it
looks like other people have problems simply applying the patches, too.
So please take a tad bit more care when prepping patch series. Otherwise
the hours (including thinking time) I've just put into this review feel
kinda wasted.
For a _really_ nice patch series that was a joy to read, look no further
than at Zhenyu Wang's recent connector rework (the patch series that got
merged, _not_ the first submission - but that can serve as comparison).
Oh, one last thing: Given that libIntelXvMC seems to support vld (for
mpeg2), why exactly do we need this?
Yours, Daniel
--
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-11 22:24 ` Daniel Vetter
@ 2010-05-14 1:39 ` Zou, Nanhai
2010-05-14 9:51 ` Daniel Vetter
2010-05-14 17:43 ` Owain Ainsworth
0 siblings, 2 replies; 24+ messages in thread
From: Zou, Nanhai @ 2010-05-14 1:39 UTC (permalink / raw)
To: Daniel Vetter; +Cc: Intel GFX, Anholt, Eric
Hi,
Thanks for reviewing the patch.
To clarify,
This patch is used for H.264/VC1 decoding.
Abstract ring buffer is also needed for our later generation GPUs,
Because there are more types of ring buffer to come.
H.264/VC1 HW decoding is a bigger requirement than mepg2.
XvMC VLD can only support mpeg2. It is rendering in server context
and lack of post processing features.
We are not going to support it later, our later media work will be
based on vaapi.
Synchronize between BSD and render ring will be done by VAAPI client.
We have run Tons of video validation upon this patch.
Our QA will do 1-2 days of regression test on different chips each time I rebase and sent out the patch
Let's get simple code works first then we can optimize it.
Thanks
Zou Nan hai
-----Original Message-----
From: Daniel Vetter [mailto:daniel@ffwll.ch]
Sent: 2010年5月12日 6:24
To: Zou, Nanhai
Cc: Anholt, Eric; Intel GFX
Subject: Re: [Intel-gfx] [PATCH 1/4] introduce intel_ring_buffer structure
Hi all,
Ok, here's my try at a review. Sorry for the rather long delay.
First things first, please put on your asbestos suits, this one's gonna be
rough;) Second: Your patch is still a pain to review for the following
reasons:
- IMHO you split-up made things worse: The completely rewritten render
ring implementation is introduced in patch 1, but only really fully used
when all 4 patches are applied. Now instead of hunting around in one
monster patch (which was hard) I have to hunt around in four different
patches to make sense of changes (rather impossible). Similarly other
stuff is splattered all over the series, e.g. I've found definitions for
the bsd ring in patch 1 instead of patch 3.
- Your doing tons of (at least at first glance) superfluously
search&replaces. Yep, inconsistent naming is ugly, but patches bloated
with tons of such changes are even uglier. Some examples:
s/ring/render_ring/
s/drm_i915_ring_buffer_t/intel_ring_buffer/
There are more. If you think these changes really are beneficial, please
do one patch per s&r with nothing else in it.
Now for the more specific issues:
- In my previous review I've proposed to replace the gpu execution
breadcrumb with something like this
struct intel_gpu_breadcrumb {
uint32_t seqno : 30;
uint32_t domain : 2;
};
Compared to your approach this doesn't waste a pointer per gem object.
Further it can be used for execution domains like keeping track of
pageflips (tossed around some ideas with Kristian on irc) or to batch up
gtt chipset flushes (probably needed to make my i855 cache coherency stuff
fast again). So yes, I'd like to see this.
- struct intel_ring_buffer looks massively overdesigned in your patch. I
think a more lightweight approach that leaves as much as possible of the
render ring implementation untouched (and doesn't move it around) is
better for at least two reasons:
- Your abstraction requires quite a few automatic changes all over. This
needlessly bloats the patch and makes finding the interesting stuff
harder.
- Greatly reduced risk of introducing regressions.
There are also a few places that look rather fishy. Mostly I simply
couldn't follow anymore what's going on due to the reasons laid out above.
Anyway, here they are:
- You (seem to) move around kernel_lost_context. For a feature that's
currently only supported for hw that was never supported with ums, this
is either fishy or needs a big comment.
- In i915_gem_flush you simply flush both ringbuffers. Now either these
two have independent caches (and only one needs to be flushed, gem is
not supposed to do unnecessary flushes) or they are coherent and only
one flush is required, period. Current code looks like it tries to paper
over some coherency issues (and I've learned to loathe these buggers).
Related to this: The active list gets split up between the to
ringbuffers. The flushing list is still global. This looks inconsistent.
- I haven't found out how synchronization between the bsd and the render
ringbuffer is handled. And if the answer is "userspace takes care" I'm
not gonna like it.
With this off the table, I still have to do some venting ;) The idea
behind splitting patches up is:
- to make life easier for reviewers. IHMO you've failed in that regard.
- to make bisecting regressions easy. Given that I've found gimmicks like
tatus_page_dmah->vaddr;
- memset(dev_priv->render_ring.status_page.page_addr
+ memset(dev_priv->render_ring.status_page.page_addr,
0, PAGE_SIZE);
}
in your patches (note the re-added semicolon at the end) your patches
definitely fail at that - they won't even compile in between. And it
looks like other people have problems simply applying the patches, too.
So please take a tad bit more care when prepping patch series. Otherwise
the hours (including thinking time) I've just put into this review feel
kinda wasted.
For a _really_ nice patch series that was a joy to read, look no further
than at Zhenyu Wang's recent connector rework (the patch series that got
merged, _not_ the first submission - but that can serve as comparison).
Oh, one last thing: Given that libIntelXvMC seems to support vld (for
mpeg2), why exactly do we need this?
Yours, Daniel
--
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-14 1:39 ` Zou, Nanhai
@ 2010-05-14 9:51 ` Daniel Vetter
2010-05-14 15:03 ` Daniel Vetter
2010-05-14 17:43 ` Owain Ainsworth
1 sibling, 1 reply; 24+ messages in thread
From: Daniel Vetter @ 2010-05-14 9:51 UTC (permalink / raw)
To: Zou, Nanhai; +Cc: Intel GFX, Anholt, Eric
Hi,
On Fri, May 14, 2010 at 09:39:22AM +0800, Zou, Nanhai wrote:
> Thanks for reviewing the patch.
>
> To clarify,
> This patch is used for H.264/VC1 decoding.
> Abstract ring buffer is also needed for our later generation GPUs,
> Because there are more types of ring buffer to come.
>
> H.264/VC1 HW decoding is a bigger requirement than mepg2.
> XvMC VLD can only support mpeg2. It is rendering in server context
> and lack of post processing features.
> We are not going to support it later, our later media work will be
> based on vaapi.
Ok, let me restate my question: Is there a strict hw requirement to use a
second ring buffer? If not, what does this gain us?
I'm asking because:
- the XvMC implementation can use the media engine without a second ring
buffer. Yep, I know that XvMC is only mpeg2, but that doesn't change the
fact that the current kernel interface seems to be enough. An no,
current XvMC doesn't issue batchbuffers in the Xorg context but in the
client's process (like direct gl rendering).
- I've strolled through the ironlake docs and I didn't notice anything
saying that bsd for mpeg4/vc1 needs a special ring buffer. Please
correct me if I'm wrong.
- Even the old i815 laying around here has multiple ringbuffers with
arbitration between them. The i915 drm module never used them. The
added complexity simply doesn't pay.
So please explain the technical reasons we need this rather complex beast
of code in the kernel?
> Synchronize between BSD and render ring will be done by VAAPI client.
As I've said, I'm not gonna like this answer. I still don't. gem is
supposed to hide the asynchronous execution nature of gpus. It would be
dead easy to add the required i915_gem_object_wait_for_rendering when
switching ring buffers. You didn't do it and you don't deliver any
explanation for not doing so. In other words, your code looks deliberately
broken.
> We have run Tons of video validation upon this patch.
>
> Our QA will do 1-2 days of regression test on different chips each time I rebase and sent out the patch
Frankly, I don't care what your qa says about your patches. I measure code
quality on the following levels:
1) Is it readable by mere humans? This also includes whether someone could
make sense of it a few months down the road with git blame & friends.
IMHO your patches fail at that for the outlined reasons.
2) Is it bisectable and nicely split up for regression testing? Your
patches fail this, too.
3) Does it actually run on real hw?
Without 1) and 2) fulfilled, 3) (i.e. your qa work) doesn't matter,
because these changes are not really supportable going forward.
> Let's get simple code works first then we can optimize it.
You probably noticed by now that I'm slightly pissed, so I apologize
upfront for the harsh tone. Let me state my opinion in plain English:
Your patch series as-is is crap.
And I think the issues I've raised are not just "optimizations", but
rather fundamental problems. So either you ignore me and I'll cease to
review your patches. Or you start to fix your stuff and/or deliver decent
rebuttals explaining why I'm wrong. And If you resend patches, please
explain in the changelogs what you've changed since last submission (and
why) and what you didn't change (and why).
> Thanks
> Zou Nan hai
Yours, Daniel
--
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-14 9:51 ` Daniel Vetter
@ 2010-05-14 15:03 ` Daniel Vetter
2010-05-17 1:59 ` Zou, Nanhai
0 siblings, 1 reply; 24+ messages in thread
From: Daniel Vetter @ 2010-05-14 15:03 UTC (permalink / raw)
To: Zou, Nanhai; +Cc: Intel GFX, Anholt, Eric
Hi all,
On Fri, May 14, 2010 at 11:51:14AM +0200, Daniel Vetter wrote:
> So please explain the technical reasons we need this rather complex beast
> of code in the kernel?
Ok, I owe you my apologies (and a beer). I've read through the HD docs and
it looks like bsd decoding for avc/vc1 can't be done without the bsd
command stream. What a pain.
I'm currently crawling through the docs. It looks like we also need a new
gpu domain (something like I915_GEM_DOMAIN_BSD) in addition to a new
execution domain to properly support flushing and coherency. This way
round my concerns about userspace synchronization and the flush-both-rings
semantics would be void, too.
Yours, Daniel
--
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-14 1:39 ` Zou, Nanhai
2010-05-14 9:51 ` Daniel Vetter
@ 2010-05-14 17:43 ` Owain Ainsworth
2010-05-17 1:43 ` Zou, Nanhai
1 sibling, 1 reply; 24+ messages in thread
From: Owain Ainsworth @ 2010-05-14 17:43 UTC (permalink / raw)
To: Zou, Nanhai; +Cc: Intel GFX, Anholt, Eric
On Fri, May 14, 2010 at 09:39:22AM +0800, Zou, Nanhai wrote:
> Hi,
>
> Thanks for reviewing the patch.
>
> To clarify,
> This patch is used for H.264/VC1 decoding.
> Abstract ring buffer is also needed for our later generation GPUs,
> Because there are more types of ring buffer to come.
>
> H.264/VC1 HW decoding is a bigger requirement than mepg2.
> XvMC VLD can only support mpeg2. It is rendering in server context
> and lack of post processing features.
> We are not going to support it later, our later media work will be
> based on vaapi.
The need for the multiple rings is ring by me. I've checked the ironlake
docs and I see how this works.
>
> Synchronize between BSD and render ring will be done by VAAPI client.
On the other hand, I have a problem with this particular situation.
Without a description of exactly how this works I can not be entirely
sure, but this sounds very unsafe to me.
It looks almost like this is going the way of the DRI1 lock, where
userland is in control of mutual exclusion. I'm glad those days are gone
and I do not wish to go back there.
Look at the rework of xvmc, see how much cleaner it is when it uses GEM
to arbitrate and control caches instead of trying to reinvent it badly
in userland.
Also, (this again is just speculation since I can't see code for
userland), doing this sync in userland means *unprivileged clients*,
i.e. anyone in userland who can speak to the xserver, get the option to
lock the gpu hard trivially. I know our protection against that isn't so
good right now (I am mulling over ideas for how to fix this better), but
openly depending on behaviour that allows this strikes me as wrong.
So, Nanhai, could you please explain how the synchronisation works in
VAAPI? I'll have more comments when I can see how this fits and I see if
it is in fact safe. As usual, seeing an interface without the user means
you only get half the picture and makes review half useless at best.
> Let's get simple code works first then we can optimize it.
Let's make sure the interface is right before we event think about
optimising it.
Cheers,
-0-
--
People will do tomorrow what they did today because that is what they
did yesterday.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-14 17:43 ` Owain Ainsworth
@ 2010-05-17 1:43 ` Zou, Nanhai
2010-05-17 17:42 ` Daniel Vetter
0 siblings, 1 reply; 24+ messages in thread
From: Zou, Nanhai @ 2010-05-17 1:43 UTC (permalink / raw)
To: Owain Ainsworth; +Cc: Intel GFX, Anholt, Eric
>>-----Original Message-----
>>From: Owain Ainsworth [mailto:zerooa@googlemail.com]
>>Sent: 2010年5月15日 1:44
>>To: Zou, Nanhai
>>Cc: Daniel Vetter; Intel GFX; Anholt, Eric
>>Subject: Re: [Intel-gfx] [PATCH 1/4] introduce intel_ring_buffer structure
>>
>>On Fri, May 14, 2010 at 09:39:22AM +0800, Zou, Nanhai wrote:
>>> Hi,
>>>
>>> Thanks for reviewing the patch.
>>>
>>> To clarify,
>>> This patch is used for H.264/VC1 decoding.
>>> Abstract ring buffer is also needed for our later generation GPUs,
>>> Because there are more types of ring buffer to come.
>>>
>>> H.264/VC1 HW decoding is a bigger requirement than mepg2.
>>> XvMC VLD can only support mpeg2. It is rendering in server context
>>> and lack of post processing features.
>>> We are not going to support it later, our later media work will be
>>> based on vaapi.
>>
>>The need for the multiple rings is ring by me. I've checked the ironlake
>>docs and I see how this works.
>>
>>>
>>> Synchronize between BSD and render ring will be done by VAAPI client.
>>
>>On the other hand, I have a problem with this particular situation.
>>Without a description of exactly how this works I can not be entirely
>>sure, but this sounds very unsafe to me.
>>
>>It looks almost like this is going the way of the DRI1 lock, where
>>userland is in control of mutual exclusion. I'm glad those days are gone
>>and I do not wish to go back there.
>>
>>Look at the rework of xvmc, see how much cleaner it is when it uses GEM
>>to arbitrate and control caches instead of trying to reinvent it badly
>>in userland.
>>
>>Also, (this again is just speculation since I can't see code for
>>userland), doing this sync in userland means *unprivileged clients*,
>>i.e. anyone in userland who can speak to the xserver, get the option to
>>lock the gpu hard trivially. I know our protection against that isn't so
>>good right now (I am mulling over ideas for how to fix this better), but
>>openly depending on behaviour that allows this strikes me as wrong.
>>
>>So, Nanhai, could you please explain how the synchronisation works in
>>VAAPI? I'll have more comments when I can see how this fits and I see if
>>it is in fact safe. As usual, seeing an interface without the user means
>>you only get half the picture and makes review half useless at best.
>>
>>> Let's get simple code works first then we can optimize it.
>>
>>Let's make sure the interface is right before we event think about
>>optimising it.
>>
>>Cheers,
>>
Hi,
You may check how user space works in VAAPI code, if you have an Ironlake system.
We have install guide at http://intellinuxgraphics.org/h264.html.
We have some customer requests for H.264 decoding, so we point to an early kernel and a similar patch on that page.
Actually no additional synchronize was add to user space, after BSD decoding is done, we map the BSD output for media pipe input command stream, at this point
kernel will wait BSD decoding done before it begin media pipe.
so unprivileged client can not damage GPU with this.
Though this is not the most efficient way to do the work,
One of our optimize plan is to have double ring buffer for H.264 decoding later,
e.g. while BSD is decoding to one buffer, media pipe may consume another buffer,
So media and BSD ring can work in parallel. Once we begin to do that, we may need to design a better synchronize method.
Thanks
Zou Nan hai
>>-0-
>>--
>>People will do tomorrow what they did today because that is what they
>>did yesterday.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-14 15:03 ` Daniel Vetter
@ 2010-05-17 1:59 ` Zou, Nanhai
2010-05-17 17:52 ` Daniel Vetter
0 siblings, 1 reply; 24+ messages in thread
From: Zou, Nanhai @ 2010-05-17 1:59 UTC (permalink / raw)
To: Daniel Vetter; +Cc: Intel GFX, Anholt, Eric
>>-----Original Message-----
>>From: Daniel Vetter [mailto:daniel@ffwll.ch]
>>Sent: 2010年5月14日 23:03
>>To: Zou, Nanhai
>>Cc: Daniel Vetter; Anholt, Eric; Intel GFX
>>Subject: Re: [Intel-gfx] [PATCH 1/4] introduce intel_ring_buffer structure
>>
>>Hi all,
>>
>>On Fri, May 14, 2010 at 11:51:14AM +0200, Daniel Vetter wrote:
>>> So please explain the technical reasons we need this rather complex beast
>>> of code in the kernel?
>>
>>Ok, I owe you my apologies (and a beer). I've read through the HD docs and
>>it looks like bsd decoding for avc/vc1 can't be done without the bsd
>>command stream. What a pain.
>>
Never mind,
H.264 HW decoding is a huge amount of work,
I can image much more works needed later.
Gem may need some more changes if we continue to improve H.264 decoding work.
e.g currently we found H.264 decoding used too much GPU memory cause
2 HD H.264 video decoding slow, we are tring optimize gem GPU memory usage.
Even some works we may do with current ring buffer,
We'd better split it to maximum utilize GPU power.
E.g. on Gen6, we have blitter ring buffer. Though blitter can be done
in render ring buffer, implement and using blitter may improve throughput.
>>I'm currently crawling through the docs. It looks like we also need a new
>>gpu domain (something like I915_GEM_DOMAIN_BSD) in addition to a new
>>execution domain to properly support flushing and coherency. This way
>>round my concerns about userspace synchronization and the flush-both-rings
>>semantics would be void, too.
>>
Actually we have considered adding BSD_DOMAIN when we first design the patch,
Synchronize BSD ring will done at mapping time of media ring buffer.
Since BSD ring will always write, media ring will always read.
No multiple client need to refer to one ring buffer.
We decided to keep this simple at the beginning till we need to do some additional optimize on that.
We almost do not flush both rings in hot path. If you check the code,
Flush both ring will only happen at slow path, e.g gpu_idle.
Thanks
Zou Nan hai
>>Yours, Daniel
>>--
>>Daniel Vetter
>>Mail: daniel@ffwll.ch
>>Mobile: +41 (0)79 365 57 48
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-17 1:43 ` Zou, Nanhai
@ 2010-05-17 17:42 ` Daniel Vetter
2010-05-18 2:20 ` Zou, Nanhai
0 siblings, 1 reply; 24+ messages in thread
From: Daniel Vetter @ 2010-05-17 17:42 UTC (permalink / raw)
To: Zou, Nanhai; +Cc: Anholt, Eric, Intel GFX
Hi
On Mon, May 17, 2010 at 09:43:02AM +0800, Zou, Nanhai wrote:
> You may check how user space works in VAAPI code, if you have an Ironlake system.
> We have install guide at http://intellinuxgraphics.org/h264.html.
> We have some customer requests for H.264 decoding, so we point to an
> early kernel and a similar patch on that page.
I'll try this out. But likely takes a while till I get around - there's
enough other stuff going on to keep me busy.
> Actually no additional synchronize was add to user space, after BSD
> decoding is done, we map the BSD output for media pipe input command
> stream, at this point kernel will wait BSD decoding done before it begin
> media pipe.
>
> so unprivileged client can not damage GPU with this.
>
> Though this is not the most efficient way to do the work,
>
> One of our optimize plan is to have double ring buffer for H.264
> decoding later, e.g. while BSD is decoding to one buffer, media pipe may
> consume another buffer,
>
> So media and BSD ring can work in parallel. Once we begin to do that, we
> may need to design a better synchronize method.
Well, that's exactly the problem. You simply can't optimize a kernel
interface once it's in use. And if you try to, your users will get the
pitchforks and scream bloody murder trying to get you ;) So we need to get
these patches right (at least the semantics of the interface) beforehand.
> Thanks
> Zou Nan hai
Yours, Daniel
--
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-17 1:59 ` Zou, Nanhai
@ 2010-05-17 17:52 ` Daniel Vetter
0 siblings, 0 replies; 24+ messages in thread
From: Daniel Vetter @ 2010-05-17 17:52 UTC (permalink / raw)
To: Zou, Nanhai; +Cc: Intel GFX, Anholt, Eric
On Mon, May 17, 2010 at 09:59:22AM +0800, Zou, Nanhai wrote:
> H.264 HW decoding is a huge amount of work,
> I can image much more works needed later.
> Gem may need some more changes if we continue to improve H.264 decoding work.
> e.g currently we found H.264 decoding used too much GPU memory cause
> 2 HD H.264 video decoding slow, we are tring optimize gem GPU memory usage.
>
> Even some works we may do with current ring buffer,
> We'd better split it to maximum utilize GPU power.
You might want to try out the fair-lru-eviction patch by Chris Wilson.
When you're pushing gtt usage to the limit, this should behave much better
than the current code.
> E.g. on Gen6, we have blitter ring buffer. Though blitter can be done
> in render ring buffer, implement and using blitter may improve throughput.
>
> >>I'm currently crawling through the docs. It looks like we also need a new
> >>gpu domain (something like I915_GEM_DOMAIN_BSD) in addition to a new
> >>execution domain to properly support flushing and coherency. This way
> >>round my concerns about userspace synchronization and the flush-both-rings
> >>semantics would be void, too.
> >>
>
> Actually we have considered adding BSD_DOMAIN when we first design the patch,
> Synchronize BSD ring will done at mapping time of media ring buffer.
> Since BSD ring will always write, media ring will always read.
> No multiple client need to refer to one ring buffer.
Well, the bsd(write) -> render(read) is exactly the kind of cache
coherency transition gem is supposed to handle. Differentiating between
these two by the ring the object is sitting on looks hackish to me. We're
not really short on bits for domains after all.
> We decided to keep this simple at the beginning till we need to do some additional optimize on that.
>
> We almost do not flush both rings in hot path. If you check the code,
> Flush both ring will only happen at slow path, e.g gpu_idle.
Add it to the list of things I've got wrong while trying to read your
patches ;)
Yours, Daniel
--
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-17 17:42 ` Daniel Vetter
@ 2010-05-18 2:20 ` Zou, Nanhai
2010-05-18 16:19 ` Simon Farnsworth
0 siblings, 1 reply; 24+ messages in thread
From: Zou, Nanhai @ 2010-05-18 2:20 UTC (permalink / raw)
To: Daniel Vetter; +Cc: Anholt, Eric, Intel GFX
>>-----Original Message-----
>>From: Daniel Vetter [mailto:daniel@ffwll.ch]
>>Sent: 2010年5月18日 1:43
>>To: Zou, Nanhai
>>Cc: Owain Ainsworth; Daniel Vetter; Intel GFX; Anholt, Eric
>>Subject: Re: [Intel-gfx] [PATCH 1/4] introduce intel_ring_buffer structure
>>
>>Hi
>>
>>On Mon, May 17, 2010 at 09:43:02AM +0800, Zou, Nanhai wrote:
>>> You may check how user space works in VAAPI code, if you have an Ironlake
>>system.
>>> We have install guide at http://intellinuxgraphics.org/h264.html.
>>> We have some customer requests for H.264 decoding, so we point to an
>>> early kernel and a similar patch on that page.
>>
>>I'll try this out. But likely takes a while till I get around - there's
>>enough other stuff going on to keep me busy.
>>
>>> Actually no additional synchronize was add to user space, after BSD
>>> decoding is done, we map the BSD output for media pipe input command
>>> stream, at this point kernel will wait BSD decoding done before it begin
>>> media pipe.
>>>
>>> so unprivileged client can not damage GPU with this.
>>>
>>> Though this is not the most efficient way to do the work,
>>>
>>> One of our optimize plan is to have double ring buffer for H.264
>>> decoding later, e.g. while BSD is decoding to one buffer, media pipe may
>>> consume another buffer,
>>>
>>> So media and BSD ring can work in parallel. Once we begin to do that, we
>>> may need to design a better synchronize method.
>>
>>Well, that's exactly the problem. You simply can't optimize a kernel
>>interface once it's in use. And if you try to, your users will get the
>>pitchforks and scream bloody murder trying to get you ;) So we need to get
>>these patches right (at least the semantics of the interface) beforehand.
>>
>>> Thanks
>>> Zou Nan hai
>>
Hi,
Since VAAPI will be the only client for this multi ring buffer for a period of time.
We may not have so much pain to optimize both kernel and user space.
This approach touches little user space interface, only 1 new flag is added to IOCTL.
We have new kinds of ring buffer to come in SandyBridge and later chips. Since we are still enabling SandyBridge. I can not for cast how those rings would be synchronized to each other.
We may use minimum API to work first.
>>Yours, Daniel
>>--
>>Daniel Vetter
>>Mail: daniel@ffwll.ch
>>Mobile: +41 (0)79 365 57 48
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-18 2:20 ` Zou, Nanhai
@ 2010-05-18 16:19 ` Simon Farnsworth
2010-05-19 1:09 ` Zou, Nanhai
0 siblings, 1 reply; 24+ messages in thread
From: Simon Farnsworth @ 2010-05-18 16:19 UTC (permalink / raw)
To: intel-gfx; +Cc: Anholt, Eric
On Tuesday 18 May 2010, "Zou, Nanhai" <nanhai.zou@intel.com> wrote:
> >>-----Original Message-----
> >>From: Daniel Vetter [mailto:daniel@ffwll.ch]
> >>Sent: 2010年5月18日 1:43
> >>Well, that's exactly the problem. You simply can't optimize a kernel
> >>interface once it's in use. And if you try to, your users will get the
> >>pitchforks and scream bloody murder trying to get you ;) So we need to
> >>get these patches right (at least the semantics of the interface)
> >>beforehand.
> >>
> Hi,
> Since VAAPI will be the only client for this multi ring buffer for a period
> of time. We may not have so much pain to optimize both kernel and user
> space. This approach touches little user space interface, only 1 new flag
> is added to IOCTL. We have new kinds of ring buffer to come in
> SandyBridge and later chips. Since we are still enabling SandyBridge. I
> can not for cast how those rings would be synchronized to each other. We
> may use minimum API to work first.
>
Be aware that you cannot expect users to bump both their userspace VAAPI
library and their kernel at the same time; there are good reasons for this.
If you do tie the VAAPI library version and the kernel version too tightly
together, you get into a position where users get upset - if I bump my kernel
from 2.6.32 to 2.6.36 to get support for a new WiFi chipset and a new TV
capture card, I'm likely to get quite upset if I then have to also respin my
userspace.
There are also issues around bisection as a tool for isolating regressions -
if I have to carefully bump userspace and kernel in lock-step, it's very
difficult for me to bisect down until I find the changeset that broke things. As
most of the bugs you'll get reported from users like me won't reproduce in
your lab, making it harder for me to give you helpful information is not in
your interests.
In conclusion, as Daniel's been saying, you must get the kernel interface
mostly right first time. If it's too slow, a new interface to improve
performance isn't hard to add in parallel to the old interface; if the old
interface is a DoS vector or worse, you're going to get stuck in a very bad
place.
--
Simon Farnsworth
Software Engineer
ONELAN Limited
http://www.onelan.com/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-18 16:19 ` Simon Farnsworth
@ 2010-05-19 1:09 ` Zou, Nanhai
2010-05-19 9:00 ` Simon Farnsworth
0 siblings, 1 reply; 24+ messages in thread
From: Zou, Nanhai @ 2010-05-19 1:09 UTC (permalink / raw)
To: Simon Farnsworth, intel-gfx@lists.freedesktop.org; +Cc: Anholt, Eric
>>-----Original Message-----
>>From: Simon Farnsworth [mailto:simon.farnsworth@onelan.com]
>>Sent: 2010年5月19日 0:20
>>To: intel-gfx@lists.freedesktop.org
>>Cc: Zou, Nanhai; Daniel Vetter; Anholt, Eric
>>Subject: Re: [Intel-gfx] [PATCH 1/4] introduce intel_ring_buffer structure
>>
>>On Tuesday 18 May 2010, "Zou, Nanhai" <nanhai.zou@intel.com> wrote:
>>> >>-----Original Message-----
>>> >>From: Daniel Vetter [mailto:daniel@ffwll.ch]
>>> >>Sent: 2010年5月18日 1:43
>>> >>Well, that's exactly the problem. You simply can't optimize a kernel
>>> >>interface once it's in use. And if you try to, your users will get the
>>> >>pitchforks and scream bloody murder trying to get you ;) So we need to
>>> >>get these patches right (at least the semantics of the interface)
>>> >>beforehand.
>>> >>
>>> Hi,
>>> Since VAAPI will be the only client for this multi ring buffer for a period
>>> of time. We may not have so much pain to optimize both kernel and user
>>> space. This approach touches little user space interface, only 1 new flag
>>> is added to IOCTL. We have new kinds of ring buffer to come in
>>> SandyBridge and later chips. Since we are still enabling SandyBridge. I
>>> can not for cast how those rings would be synchronized to each other. We
>>> may use minimum API to work first.
>>>
>>Be aware that you cannot expect users to bump both their userspace VAAPI
>>library and their kernel at the same time; there are good reasons for this.
>>
>>If you do tie the VAAPI library version and the kernel version too tightly
>>together, you get into a position where users get upset - if I bump my kernel
>>from 2.6.32 to 2.6.36 to get support for a new WiFi chipset and a new TV
>>capture card, I'm likely to get quite upset if I then have to also respin my
>>userspace.
>>
>>There are also issues around bisection as a tool for isolating regressions -
>>if I have to carefully bump userspace and kernel in lock-step, it's very
>>difficult for me to bisect down until I find the changeset that broke things.
>>As
>>most of the bugs you'll get reported from users like me won't reproduce in
>>your lab, making it harder for me to give you helpful information is not in
>>your interests.
>>
>>In conclusion, as Daniel's been saying, you must get the kernel interface
>>mostly right first time. If it's too slow, a new interface to improve
>>performance isn't hard to add in parallel to the old interface; if the old
>>interface is a DoS vector or worse, you're going to get stuck in a very bad
>>place.
>>--
Hi,
Currently we do not find any regression or slowness. We have been testing full HD video test along with regression tests for more than 1 month.
The only slowness we find now is with playing 2 1080p H.264 video. That was caused by too much GPU memory usage which is not related to the interface.
Thanks
Zou Nan hai
>>Simon Farnsworth
>>Software Engineer
>>ONELAN Limited
>>http://www.onelan.com/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-19 1:09 ` Zou, Nanhai
@ 2010-05-19 9:00 ` Simon Farnsworth
2010-05-19 16:54 ` Eric Anholt
0 siblings, 1 reply; 24+ messages in thread
From: Simon Farnsworth @ 2010-05-19 9:00 UTC (permalink / raw)
To: Zou, Nanhai; +Cc: intel-gfx@lists.freedesktop.org, Anholt, Eric
On Wednesday 19 May 2010, "Zou, Nanhai" <nanhai.zou@intel.com> wrote:
> Currently we do not find any regression or slowness. We have been testing
> full HD video test along with regression tests for more than 1 month. The
> only slowness we find now is with playing 2 1080p H.264 video. That was
> caused by too much GPU memory usage which is not related to the interface.
>
And are you utterly confident that if anyone finds a way to use your new ioctl
to (e.g.) hang the GPU, stall rendering indefinitely waiting for media engine
events that will never happen, or perhaps bypass system security, you can fix
it without changing the ioctl interface to userspace? If not, you have a
problem, and need to redesign the ioctl.
Bear in mind that I don't have to use VAAPI to call your ioctl; I can write
evil code that calls it directly. On the other hand, you can't remove the
ioctl later - users will want to use older VAAPI versions with new kernels, so
that they can upgrade without too much fear of regression.
--
Simon Farnsworth
Software Engineer
ONELAN Limited
http://www.onelan.com/
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-19 9:00 ` Simon Farnsworth
@ 2010-05-19 16:54 ` Eric Anholt
2010-05-19 17:33 ` Simon Farnsworth
0 siblings, 1 reply; 24+ messages in thread
From: Eric Anholt @ 2010-05-19 16:54 UTC (permalink / raw)
To: Simon Farnsworth, Zou, Nanhai; +Cc: intel-gfx@lists.freedesktop.org
[-- Attachment #1.1: Type: text/plain, Size: 2144 bytes --]
On Wed, 19 May 2010 10:00:10 +0100, Simon Farnsworth <simon.farnsworth@onelan.com> wrote:
> On Wednesday 19 May 2010, "Zou, Nanhai" <nanhai.zou@intel.com> wrote:
> > Currently we do not find any regression or slowness. We have been testing
> > full HD video test along with regression tests for more than 1 month. The
> > only slowness we find now is with playing 2 1080p H.264 video. That was
> > caused by too much GPU memory usage which is not related to the interface.
> >
> And are you utterly confident that if anyone finds a way to use your new ioctl
> to (e.g.) hang the GPU, stall rendering indefinitely waiting for media engine
> events that will never happen, or perhaps bypass system security, you can fix
> it without changing the ioctl interface to userspace? If not, you have a
> problem, and need to redesign the ioctl.
>
> Bear in mind that I don't have to use VAAPI to call your ioctl; I can write
> evil code that calls it directly. On the other hand, you can't remove the
> ioctl later - users will want to use older VAAPI versions with new kernels, so
> that they can upgrade without too much fear of regression.
OK, now you're going over the top. The GPU can't schedule[1], is turing
complete, and you hand it programs. You can hang it with or without his
interface. If you don't want to be able to do that, then don't let
non-root use the DRI.
Our goal in designing kernel interfaces is to make sure that we get the
best behavior we can. After clarifying the cache behavior of the BSD
rendering (it all goes into the render cache, so if we're tracking which
ring is producing the results, having objects only on one ring, and then
tracking them in the one flushing list once they fall off a ring's
active list), we can safely handle normal operation with a client dying
in the middle. Also, userland is not having to manage caching
differently from how userland already does in GEM. That was the part
that was unclear before, so I think we're good to go.
[1] OK, there's support for scheduling of operations between rings at
batch or packet boundaries. Not helping.
[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/4] introduce intel_ring_buffer structure
2010-05-19 16:54 ` Eric Anholt
@ 2010-05-19 17:33 ` Simon Farnsworth
0 siblings, 0 replies; 24+ messages in thread
From: Simon Farnsworth @ 2010-05-19 17:33 UTC (permalink / raw)
To: Eric Anholt; +Cc: intel-gfx@lists.freedesktop.org
On Wednesday 19 May 2010, Eric Anholt <eric.anholt@intel.com> wrote:
> On Wed, 19 May 2010 10:00:10 +0100, Simon Farnsworth
<simon.farnsworth@onelan.com> wrote:
> > Bear in mind that I don't have to use VAAPI to call your ioctl; I can
> > write evil code that calls it directly. On the other hand, you can't
> > remove the ioctl later - users will want to use older VAAPI versions
> > with new kernels, so that they can upgrade without too much fear of
> > regression.
>
> OK, now you're going over the top. The GPU can't schedule[1], is turing
> complete, and you hand it programs. You can hang it with or without his
> interface. If you don't want to be able to do that, then don't let
> non-root use the DRI.
>
Fair enough, and my apologies for going overboard - my intended point is that
just because VAAPI is the only planned user of this ioctl doesn't mean that
it's OK to get the userspace/kernel interface wrong.
--
Simon Farnsworth
Software Engineer
ONELAN Limited
http://www.onelan.com/
^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2010-05-19 17:33 UTC | newest]
Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-06 1:20 [PATCH 1/4] introduce intel_ring_buffer structure Zou Nan hai
2010-05-06 1:20 ` [PATCH 2/4] convert render engine to use intel_ring_buffer Zou Nan hai
2010-05-06 1:20 ` [PATCH 3/4] add BSD ring buffer support Zou Nan hai
2010-05-06 1:20 ` [PATCH 4/4] adapt intel_ring_buffer into gem Zou Nan hai
2010-05-07 21:34 ` [PATCH 1/4] introduce intel_ring_buffer structure Eric Anholt
2010-05-08 18:15 ` Thomas Bächler
2010-05-11 22:24 ` Daniel Vetter
2010-05-14 1:39 ` Zou, Nanhai
2010-05-14 9:51 ` Daniel Vetter
2010-05-14 15:03 ` Daniel Vetter
2010-05-17 1:59 ` Zou, Nanhai
2010-05-17 17:52 ` Daniel Vetter
2010-05-14 17:43 ` Owain Ainsworth
2010-05-17 1:43 ` Zou, Nanhai
2010-05-17 17:42 ` Daniel Vetter
2010-05-18 2:20 ` Zou, Nanhai
2010-05-18 16:19 ` Simon Farnsworth
2010-05-19 1:09 ` Zou, Nanhai
2010-05-19 9:00 ` Simon Farnsworth
2010-05-19 16:54 ` Eric Anholt
2010-05-19 17:33 ` Simon Farnsworth
-- strict thread matches above, loose matches on Subject: below --
2010-05-05 3:17 Zou Nan hai
2010-05-05 18:23 ` Daniel Vetter
2010-05-06 2:25 ` Zou, Nanhai
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.