* [PATCH 1/2] split render engine batch buffer and BLT engine
@ 2010-10-26 7:33 Zou Nan hai
2010-10-26 7:33 ` [PATCH 2/2] use BLT command to accelerate uxa on gen6 Zou Nan hai
2010-10-26 8:17 ` [PATCH 1/2] split render engine batch buffer and BLT engine Chris Wilson
0 siblings, 2 replies; 9+ messages in thread
From: Zou Nan hai @ 2010-10-26 7:33 UTC (permalink / raw)
To: intel-gfx
intel: on gen6, BLT commands stay in a seperate BLT ring
buffer. Split render engine batch and BLT engine batch
on gen6.
Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
---
src/i830_3d.c | 2 +-
src/i830_render.c | 16 +++--
src/i915_3d.c | 2 +-
src/i915_3d.h | 6 +-
src/i915_render.c | 23 +++---
src/i915_video.c | 9 ++-
src/i965_render.c | 21 +++---
src/i965_video.c | 10 ++--
src/intel.h | 55 ++++++++-------
src/intel_batchbuffer.c | 171 +++++++++++++++++++++++++++--------------------
src/intel_batchbuffer.h | 128 +++++++++++++++++++----------------
src/intel_display.c | 4 +-
src/intel_dri.c | 3 +-
src/intel_driver.c | 18 +++--
src/intel_uxa.c | 46 +++++++------
15 files changed, 285 insertions(+), 229 deletions(-)
diff --git a/src/i830_3d.c b/src/i830_3d.c
index 1043201..e0cfda7 100644
--- a/src/i830_3d.c
+++ b/src/i830_3d.c
@@ -38,7 +38,7 @@ void I830EmitInvarientState(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
- assert(intel->in_batch_atomic);
+ assert(intel->batch[RENDER_BATCH].in_batch_atomic);
OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(0));
OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(1));
diff --git a/src/i830_render.c b/src/i830_render.c
index 52646d3..867883a 100644
--- a/src/i830_render.c
+++ b/src/i830_render.c
@@ -302,7 +302,7 @@ static void i830_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit)
format = i8xx_get_card_format(intel, picture);
- assert(intel->in_batch_atomic);
+ assert(intel->batch[RENDER_BATCH].in_batch_atomic);
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
LOAD_TEXTURE_MAP(unit) | 4);
@@ -488,7 +488,8 @@ i830_prepare_composite(int op, PicturePtr source_picture,
if (!i830_get_dest_format(dest_picture, &intel->render_dest_format))
return FALSE;
- if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
+ if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table),
+ RENDER_BATCH))
return FALSE;
if (mask) {
@@ -565,7 +566,7 @@ i830_prepare_composite(int op, PicturePtr source_picture,
if(intel_pixmap_is_dirty(source) ||
(mask && intel_pixmap_is_dirty(mask)))
- intel_batch_emit_flush(scrn);
+ intel_batch_emit_flush(scrn, RENDER_BATCH);
intel->needs_render_state_emit = TRUE;
@@ -583,7 +584,7 @@ static void i830_emit_composite_state(ScrnInfoPtr scrn)
IntelEmitInvarientState(scrn);
intel->last_3d = LAST_3D_RENDER;
- assert(intel->in_batch_atomic);
+ assert(intel->batch[RENDER_BATCH].in_batch_atomic);
if (intel_pixmap_tiled(intel->render_dest)) {
tiling_bits = BUF_3D_TILED_SURFACE;
@@ -848,7 +849,8 @@ i830_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
intel_batch_start_atomic(scrn, 58 + /* invarient */
22 + /* setup */
20 + /* 2 * setup_texture */
- 1 + 30 /* verts */ );
+ 1 + 30 /* verts */,
+ RENDER_BATCH);
if (intel->needs_render_state_emit)
i830_emit_composite_state(scrn);
@@ -856,10 +858,10 @@ i830_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
i830_emit_composite_primitive(dest, srcX, srcY, maskX, maskY, dstX,
dstY, w, h);
- intel_batch_end_atomic(scrn);
+ intel_batch_end_atomic(scrn, RENDER_BATCH);
}
-void i830_batch_flush_notify(ScrnInfoPtr scrn)
+void i830_batch_flush_notify(ScrnInfoPtr scrn, int batch_idx)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
diff --git a/src/i915_3d.c b/src/i915_3d.c
index 77db568..a4e386d 100644
--- a/src/i915_3d.c
+++ b/src/i915_3d.c
@@ -38,7 +38,7 @@ void I915EmitInvarientState(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
- assert(intel->in_batch_atomic);
+ assert(intel->batch[RENDER_BATCH].in_batch_atomic);
OUT_BATCH(_3DSTATE_AA_CMD |
AA_LINE_ECAAR_WIDTH_ENABLE |
diff --git a/src/i915_3d.h b/src/i915_3d.h
index 04531f3..056c961 100644
--- a/src/i915_3d.h
+++ b/src/i915_3d.h
@@ -608,12 +608,12 @@ enum i915_fs_channel {
#define FS_BEGIN() \
do { \
- _shader_offset = intel->batch_used++; \
+ _shader_offset = intel->batch[RENDER_BATCH].batch_used++; \
} while (0)
#define FS_END() \
do { \
- intel->batch_ptr[_shader_offset] = \
+ intel->batch[RENDER_BATCH].batch_ptr[_shader_offset] = \
_3DSTATE_PIXEL_SHADER_PROGRAM | \
- (intel->batch_used - _shader_offset - 2); \
+ (intel->batch[RENDER_BATCH].batch_used - _shader_offset - 2); \
} while (0);
diff --git a/src/i915_render.c b/src/i915_render.c
index fafdac5..21a0021 100644
--- a/src/i915_render.c
+++ b/src/i915_render.c
@@ -546,7 +546,7 @@ i915_emit_composite_primitive(PixmapPtr dest,
{
ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
intel_screen_private *intel = intel_get_screen_private(scrn);
- Bool is_affine_src, is_affine_mask = TRUE;
+ Bool is_affine_src = FALSE, is_affine_mask = TRUE;
int per_vertex, num_floats;
int tex_unit = 0;
int src_unit = -1, mask_unit = -1;
@@ -780,7 +780,8 @@ i915_prepare_composite(int op, PicturePtr source_picture,
&intel->i915_render_state.dst_format))
return FALSE;
- if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
+ if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table),
+ RENDER_BATCH))
return FALSE;
intel->needs_render_ca_pass = FALSE;
@@ -838,10 +839,10 @@ i915_prepare_composite(int op, PicturePtr source_picture,
/* BUF_INFO is an implicit flush */
if (dest != intel->render_current_dest)
- intel_batch_do_flush(scrn);
+ intel_batch_do_flush(scrn, RENDER_BATCH);
else if((source && intel_pixmap_is_dirty(source)) ||
(mask && intel_pixmap_is_dirty(mask)))
- intel_batch_emit_flush(scrn);
+ intel_batch_emit_flush(scrn, RENDER_BATCH);
intel->needs_render_state_emit = TRUE;
@@ -1007,7 +1008,7 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
tex_count += ! is_solid_src;
tex_count += mask && ! is_solid_mask;
- assert(intel->in_batch_atomic);
+ assert(intel->batch[RENDER_BATCH].in_batch_atomic);
if (tex_count != 0) {
OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
@@ -1043,7 +1044,7 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
if (1 || dest != intel->render_current_dest) {
uint32_t tiling_bits;
- intel_batch_do_flush(scrn);
+ intel_batch_do_flush(scrn, RENDER_BATCH);
if (intel_pixmap_tiled(dest)) {
tiling_bits = BUF_3D_TILED_SURFACE;
@@ -1116,7 +1117,7 @@ i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
intel_screen_private *intel = intel_get_screen_private(scrn);
/* 28 + 16 + 10 + 20 + 32 + 16 */
- intel_batch_start_atomic(scrn, 150);
+ intel_batch_start_atomic(scrn, 150, RENDER_BATCH);
if (intel->needs_render_state_emit)
i915_emit_composite_setup(scrn);
@@ -1158,7 +1159,7 @@ i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
i915_composite_emit_shader(intel, PictOpOutReverse);
}
- intel->prim_offset = intel->batch_used;
+ intel->prim_offset = intel->batch[RENDER_BATCH].batch_used;
OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL);
OUT_BATCH(intel->vertex_index);
}
@@ -1170,7 +1171,7 @@ i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
dstX, dstY,
w, h);
- intel_batch_end_atomic(scrn);
+ intel_batch_end_atomic(scrn, RENDER_BATCH);
}
void
@@ -1179,7 +1180,7 @@ i915_vertex_flush(intel_screen_private *intel)
if (intel->prim_offset == 0)
return;
- intel->batch_ptr[intel->prim_offset] |= intel->vertex_count;
+ intel->batch[RENDER_BATCH].batch_ptr[intel->prim_offset] |= intel->vertex_count;
intel->prim_offset = 0;
if (intel->needs_render_ca_pass) {
@@ -1197,7 +1198,7 @@ i915_vertex_flush(intel_screen_private *intel)
}
void
-i915_batch_flush_notify(ScrnInfoPtr scrn)
+i915_batch_flush_notify(ScrnInfoPtr scrn, int batch_idx)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
diff --git a/src/i915_video.c b/src/i915_video.c
index 861bea6..87016a7 100644
--- a/src/i915_video.c
+++ b/src/i915_video.c
@@ -92,7 +92,7 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn,
#define BYTES_FOR_BOXES(n) ((200 + (n) * 20) * 4)
#define BOXES_IN_BYTES(s) ((((s)/4) - 200) / 20)
-#define BATCH_BYTES(p) ((p)->batch_bo->size - 16)
+#define BATCH_BYTES(p) ((p)->batch[RENDER_BATCH].batch_bo->size - 16)
while (nbox_total) {
nbox_this_time = nbox_total;
@@ -100,7 +100,8 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn,
nbox_this_time = BOXES_IN_BYTES(BATCH_BYTES(intel));
nbox_total -= nbox_this_time;
- intel_batch_start_atomic(scrn, 200 + 20 * nbox_this_time);
+ intel_batch_start_atomic(scrn, 200 + 20 * nbox_this_time,
+ RENDER_BATCH);
IntelEmitInvarientState(scrn);
intel->last_3d = LAST_3D_VIDEO;
@@ -445,7 +446,7 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn,
OUT_BATCH_F((box_y1 - dyo) * src_scale_y);
}
- intel_batch_end_atomic(scrn);
+ intel_batch_end_atomic(scrn, RENDER_BATCH);
}
if (target != pixmap) {
@@ -478,5 +479,5 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn,
target->drawable.pScreen->DestroyPixmap(target);
}
- intel_debug_flush(scrn);
+ intel_debug_flush(scrn, RENDER_BATCH);
}
diff --git a/src/i965_render.c b/src/i965_render.c
index c0c5de4..c9c65fb 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1128,7 +1128,7 @@ i965_set_picture_surface_state(intel_screen_private *intel,
read_domains = I915_GEM_DOMAIN_SAMPLER;
}
- intel_batch_mark_pixmap_domains(intel, priv, read_domains, write_domain);
+ intel_batch_mark_pixmap_domains(intel, priv, read_domains, write_domain, RENDER_BATCH);
dri_bo_emit_reloc(ss_bo, read_domains, write_domain,
0,
ss_index * sizeof(*ss) +
@@ -1169,7 +1169,8 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
intel_batch_mark_pixmap_domains(intel,
intel_get_pixmap_private(dest),
I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER);
+ I915_GEM_DOMAIN_RENDER,
+ RENDER_BATCH);
urb_vs_start = 0;
urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
@@ -1195,7 +1196,7 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
*/
ALIGN_BATCH(64);
- assert(intel->in_batch_atomic);
+ assert(intel->batch[RENDER_BATCH].in_batch_atomic);
{
/* Match Mesa driver setup */
OUT_BATCH(MI_FLUSH |
@@ -1473,7 +1474,7 @@ static Bool i965_composite_check_aperture(ScrnInfoPtr scrn)
struct gen4_render_state *render_state = intel->gen4_render_state;
gen4_composite_op *composite_op = &render_state->composite_op;
drm_intel_bo *bo_table[] = {
- intel->batch_bo,
+ intel->batch[RENDER_BATCH].batch_bo,
composite_op->binding_table_bo,
render_state->vertex_buffer_bo,
render_state->vs_state_bo,
@@ -1558,7 +1559,7 @@ i965_prepare_composite(int op, PicturePtr source_picture,
/* Flush any pending writes prior to relocating the textures. */
if (intel_pixmap_is_dirty(source) ||
(mask && intel_pixmap_is_dirty(mask)))
- intel_batch_emit_flush(scrn);
+ intel_batch_emit_flush(scrn, RENDER_BATCH);
/* Set up the surface states. */
@@ -1684,7 +1685,7 @@ i965_prepare_composite(int op, PicturePtr source_picture,
}
if (!i965_composite_check_aperture(scrn)) {
- intel_batch_submit(scrn, FALSE);
+ intel_batch_submit(scrn, FALSE, RENDER_BATCH);
if (!i965_composite_check_aperture(scrn)) {
intel_debug_fallback(scrn,
"Couldn't fit render operation "
@@ -1857,9 +1858,9 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
drm_intel_bo_subdata(vb_bo, render_state->vb_offset * 4, i * 4, vb);
if (!i965_composite_check_aperture(scrn))
- intel_batch_submit(scrn, FALSE);
+ intel_batch_submit(scrn, FALSE, RENDER_BATCH);
- intel_batch_start_atomic(scrn, 200);
+ intel_batch_start_atomic(scrn, 200, RENDER_BATCH);
if (intel->needs_render_state_emit)
i965_emit_composite_state(scrn);
@@ -1891,10 +1892,10 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
render_state->vb_offset += i;
drm_intel_bo_unreference(vb_bo);
- intel_batch_end_atomic(scrn);
+ intel_batch_end_atomic(scrn, RENDER_BATCH);
}
-void i965_batch_flush_notify(ScrnInfoPtr scrn)
+void i965_batch_flush_notify(ScrnInfoPtr scrn, int batch_idx)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
struct gen4_render_state *render_state = intel->gen4_render_state;
diff --git a/src/i965_video.c b/src/i965_video.c
index 4ededde..f3e4082 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -1154,7 +1154,7 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
float *vb;
drm_intel_bo *bo_table[] = {
NULL, /* vb_bo */
- intel->batch_bo,
+ intel->batch[RENDER_BATCH].batch_bo,
bind_bo,
intel->video.gen4_sampler_bo,
intel->video.gen4_sip_kernel_bo,
@@ -1199,10 +1199,10 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
if (drm_intel_bufmgr_check_aperture_space(bo_table,
ARRAY_SIZE(bo_table))
< 0) {
- intel_batch_submit(scrn, FALSE);
+ intel_batch_submit(scrn, FALSE, RENDER_BATCH);
}
- intel_batch_start_atomic(scrn, 100);
+ intel_batch_start_atomic(scrn, 100, RENDER_BATCH);
i965_emit_video_setup(scrn, bind_bo, n_src_surf);
@@ -1228,7 +1228,7 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
OUT_BATCH(0); /* index buffer offset, ignored */
OUT_BATCH(MI_NOOP);
- intel_batch_end_atomic(scrn);
+ intel_batch_end_atomic(scrn, RENDER_BATCH);
drm_intel_bo_unreference(vb_bo);
@@ -1240,7 +1240,7 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
/* release reference once we're finished */
drm_intel_bo_unreference(bind_bo);
- intel_debug_flush(scrn);
+ intel_debug_flush(scrn, RENDER_BATCH);
}
void i965_free_video(ScrnInfoPtr scrn)
diff --git a/src/intel.h b/src/intel.h
index 7604eee..c5bdbd5 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -223,7 +223,7 @@ static inline Bool intel_pixmap_tiled(PixmapPtr pixmap)
}
dri_bo *intel_get_pixmap_bo(PixmapPtr pixmap);
-void intel_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo);
+void intel_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo, int batch_idx);
typedef struct _I830OutputRec I830OutputRec, *I830OutputPtr;
@@ -284,6 +284,26 @@ enum dri_type {
DRI_DRI2
};
+struct batch {
+ uint32_t batch_ptr[4096];
+ /** Byte offset in batch_ptr for the next dword to be emitted. */
+ unsigned int batch_used;
+ /** Position in batch_ptr at the start of the current BEGIN_BATCH */
+ unsigned int batch_emit_start;
+ /** Number of bytes to be emitted in the current BEGIN_BATCH. */
+ uint32_t batch_emitting;
+ dri_bo *batch_bo;
+ dri_bo *last_batch_bo;
+ /** Whether we're in a section of code that can't tolerate flushing */
+ Bool in_batch_atomic;
+ /** Ending batch_used that was verified by intel_start_batch_atomic() */
+ int batch_atomic_limit;
+ struct list batch_pixmaps;
+ struct list flush_pixmaps;
+ struct list in_flight;
+ Bool need_mi_flush;
+};
+
typedef struct intel_screen_private {
ScrnInfoPtr scrn;
unsigned char *MMIOBase;
@@ -304,23 +324,9 @@ typedef struct intel_screen_private {
dri_bufmgr *bufmgr;
- uint32_t batch_ptr[4096];
- /** Byte offset in batch_ptr for the next dword to be emitted. */
- unsigned int batch_used;
- /** Position in batch_ptr at the start of the current BEGIN_BATCH */
- unsigned int batch_emit_start;
- /** Number of bytes to be emitted in the current BEGIN_BATCH. */
- uint32_t batch_emitting;
- dri_bo *batch_bo;
- dri_bo *last_batch_bo;
- /** Whether we're in a section of code that can't tolerate flushing */
- Bool in_batch_atomic;
- /** Ending batch_used that was verified by intel_start_batch_atomic() */
- int batch_atomic_limit;
- struct list batch_pixmaps;
- struct list flush_pixmaps;
- struct list in_flight;
-
+#define RENDER_BATCH 0
+#define BLT_BATCH 1
+ struct batch batch[2];
/* For Xvideo */
Bool use_overlay;
#ifdef INTEL_XVMC
@@ -332,7 +338,6 @@ typedef struct intel_screen_private {
Bool shadow_present;
- Bool need_mi_flush;
Bool tiling;
Bool swapbuffers_wait;
@@ -348,7 +353,7 @@ typedef struct intel_screen_private {
CloseScreenProcPtr CloseScreen;
void (*vertex_flush) (struct intel_screen_private *intel);
- void (*batch_flush_notify) (ScrnInfoPtr scrn);
+ void (*batch_flush_notify) (ScrnInfoPtr scrn, int batch_idx);
uxa_driver_t *uxa_driver;
Bool need_sync;
@@ -554,8 +559,8 @@ Bool i915_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask,
void i915_composite(PixmapPtr dest, int srcX, int srcY,
int maskX, int maskY, int dstX, int dstY, int w, int h);
void i915_vertex_flush(intel_screen_private *intel);
-void i915_batch_flush_notify(ScrnInfoPtr scrn);
-void i830_batch_flush_notify(ScrnInfoPtr scrn);
+void i915_batch_flush_notify(ScrnInfoPtr scrn, int batch_idx);
+void i830_batch_flush_notify(ScrnInfoPtr scrn, int batch_idx);
/* i965_render.c */
unsigned int gen4_render_state_size(ScrnInfoPtr scrn);
void gen4_render_state_init(ScrnInfoPtr scrn);
@@ -570,7 +575,7 @@ Bool i965_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask,
void i965_composite(PixmapPtr dest, int srcX, int srcY,
int maskX, int maskY, int dstX, int dstY, int w, int h);
-void i965_batch_flush_notify(ScrnInfoPtr scrn);
+void i965_batch_flush_notify(ScrnInfoPtr scrn, int batch_idx);
Bool intel_transform_is_affine(PictTransformPtr t);
Bool
@@ -658,7 +663,7 @@ static inline drm_intel_bo *intel_bo_alloc_for_data(ScrnInfoPtr scrn,
#define ALLOW_SHARING 0x00000010
#define DISABLE_REUSE 0x00000020
-void intel_debug_flush(ScrnInfoPtr scrn);
+void intel_debug_flush(ScrnInfoPtr scrn, int batch_idx);
static inline PixmapPtr get_drawable_pixmap(DrawablePtr drawable)
{
@@ -683,7 +688,7 @@ Bool intel_uxa_init(ScreenPtr pScreen);
void intel_uxa_create_screen_resources(ScreenPtr pScreen);
void intel_uxa_block_handler(intel_screen_private *intel);
Bool intel_get_aperture_space(ScrnInfoPtr scrn, drm_intel_bo ** bo_table,
- int num_bos);
+ int num_bos, int batch_idx);
/* intel_shadow.c */
void intel_shadow_blt(intel_screen_private *intel);
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index e7ca69d..3b72ba1 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -61,19 +61,22 @@ void intel_next_vertex(intel_screen_private *intel)
intel->vertex_used = 0;
}
-static void intel_next_batch(ScrnInfoPtr scrn)
+static void intel_next_batch(ScrnInfoPtr scrn, int batch_idx)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
+ struct batch *batch = &intel->batch[batch_idx];
+ batch->batch_emit_start = 0;
+ batch->batch_emitting = 0;
/* The 865 has issues with larger-than-page-sized batch buffers. */
if (IS_I865G(intel))
- intel->batch_bo =
+ batch->batch_bo =
dri_bo_alloc(intel->bufmgr, "batch", 4096, 4096);
else
- intel->batch_bo =
+ batch->batch_bo =
dri_bo_alloc(intel->bufmgr, "batch", 4096 * 4, 4096);
- intel->batch_used = 0;
+ batch->batch_used = 0;
/* We don't know when another client has executed, so we have
* to reinitialize our 3D state per batch.
@@ -85,41 +88,36 @@ void intel_batch_init(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
- intel->batch_emit_start = 0;
- intel->batch_emitting = 0;
-
- intel_next_batch(scrn);
+ intel_next_batch(scrn, RENDER_BATCH);
+ if (IS_GEN6(intel))
+ intel_next_batch(scrn, BLT_BATCH);
}
-void intel_batch_teardown(ScrnInfoPtr scrn)
+static void batch_teardown(ScrnInfoPtr scrn, int batch_idx)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
+ struct batch *batch = &intel->batch[batch_idx];
- if (intel->batch_bo != NULL) {
- dri_bo_unreference(intel->batch_bo);
- intel->batch_bo = NULL;
- }
-
- if (intel->last_batch_bo != NULL) {
- dri_bo_unreference(intel->last_batch_bo);
- intel->last_batch_bo = NULL;
+ if (batch->batch_bo != NULL) {
+ dri_bo_unreference(batch->batch_bo);
+ batch->batch_bo = NULL;
}
- if (intel->vertex_bo) {
- dri_bo_unreference(intel->vertex_bo);
- intel->vertex_bo = NULL;
+ if (batch->last_batch_bo != NULL) {
+ dri_bo_unreference(batch->last_batch_bo);
+ batch->last_batch_bo = NULL;
}
- while (!list_is_empty(&intel->batch_pixmaps))
- list_del(intel->batch_pixmaps.next);
+ while (!list_is_empty(&batch->batch_pixmaps))
+ list_del(batch->batch_pixmaps.next);
- while (!list_is_empty(&intel->flush_pixmaps))
- list_del(intel->flush_pixmaps.next);
+ while (!list_is_empty(&batch->flush_pixmaps))
+ list_del(batch->flush_pixmaps.next);
- while (!list_is_empty(&intel->in_flight)) {
+ while (!list_is_empty(&batch->in_flight)) {
struct intel_pixmap *entry;
- entry = list_first_entry(&intel->in_flight,
+ entry = list_first_entry(&batch->in_flight,
struct intel_pixmap,
in_flight);
@@ -129,70 +127,95 @@ void intel_batch_teardown(ScrnInfoPtr scrn)
}
}
-void intel_batch_do_flush(ScrnInfoPtr scrn)
+void intel_batch_teardown(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
- while (!list_is_empty(&intel->flush_pixmaps))
- list_del(intel->flush_pixmaps.next);
+ if (intel->vertex_bo) {
+ dri_bo_unreference(intel->vertex_bo);
+ intel->vertex_bo = NULL;
+ }
- intel->need_mi_flush = FALSE;
+ batch_teardown(scrn, RENDER_BATCH);
+ if (IS_GEN6(intel))
+ batch_teardown(scrn, BLT_BATCH);
}
-void intel_batch_emit_flush(ScrnInfoPtr scrn)
+void intel_batch_do_flush(ScrnInfoPtr scrn, int batch_idx)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
- int flags;
+ struct batch *batch = &intel->batch[batch_idx];
- assert (!intel->in_batch_atomic);
+ while (!list_is_empty(&batch->flush_pixmaps))
+ list_del(batch->flush_pixmaps.next);
- /* Big hammer, look to the pipelined flushes in future. */
- flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
- if (INTEL_INFO(intel)->gen >= 40)
- flags = 0;
+ batch->need_mi_flush = FALSE;
+}
- BEGIN_BATCH(1);
- OUT_BATCH(MI_FLUSH | flags);
- ADVANCE_BATCH();
+void intel_batch_emit_flush(ScrnInfoPtr scrn, int batch_idx)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ struct batch *batch = &intel->batch[batch_idx];
+ int flags;
- intel_batch_do_flush(scrn);
+ assert (!batch->in_batch_atomic);
+ if (batch_idx == RENDER_BATCH) {
+ /* Big hammer, look to the pipelined flushes in future. */
+ flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
+ if (INTEL_INFO(intel)->gen >= 40)
+ flags = 0;
+
+ BEGIN_BATCH(1);
+ OUT_BATCH(MI_FLUSH | flags);
+ ADVANCE_BATCH();
+
+ intel_batch_do_flush(scrn, batch_idx);
+ } else {
+ }
}
-void intel_batch_submit(ScrnInfoPtr scrn, int flush)
+void intel_batch_submit(ScrnInfoPtr scrn, int flush, int batch_idx)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
int ret;
+ struct batch *batch = &intel->batch[batch_idx];
- assert (!intel->in_batch_atomic);
+ assert (!batch->in_batch_atomic);
- if (intel->vertex_flush)
- intel->vertex_flush(intel);
- intel_end_vertex(intel);
+ if (batch_idx == RENDER_BATCH) {
+ if (intel->vertex_flush)
+ intel->vertex_flush(intel);
+ intel_end_vertex(intel);
+ }
if (flush)
- intel_batch_emit_flush(scrn);
+ intel_batch_emit_flush(scrn, batch_idx);
- if (intel->batch_used == 0)
+ if (batch->batch_used == 0)
return;
/* Mark the end of the batchbuffer. */
OUT_BATCH(MI_BATCH_BUFFER_END);
/* Emit a padding dword if we aren't going to be quad-word aligned. */
- if (intel->batch_used & 1)
+ if (batch->batch_used & 1)
OUT_BATCH(MI_NOOP);
if (DUMP_BATCHBUFFERS) {
FILE *file = fopen(DUMP_BATCHBUFFERS, "a");
if (file) {
- fwrite (intel->batch_ptr, intel->batch_used*4, 1, file);
+ fwrite (batch->batch_ptr, batch->batch_used*4, 1, file);
fclose(file);
}
}
- ret = dri_bo_subdata(intel->batch_bo, 0, intel->batch_used*4, intel->batch_ptr);
- if (ret == 0)
- ret = dri_bo_exec(intel->batch_bo, intel->batch_used*4,
- NULL, 0, 0xffffffff);
+ ret = dri_bo_subdata(batch->batch_bo, 0, batch->batch_used*4, batch->batch_ptr);
+ if (ret == 0) {
+ if (batch_idx == RENDER_BATCH) {
+ ret = dri_bo_exec(batch->batch_bo, batch->batch_used*4,
+ NULL, 0, 0xffffffff);
+ } else {
+ }
+ }
if (ret != 0) {
if (ret == -EIO) {
static int once;
@@ -212,10 +235,10 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush)
}
}
- while (!list_is_empty(&intel->batch_pixmaps)) {
+ while (!list_is_empty(&batch->batch_pixmaps)) {
struct intel_pixmap *entry;
- entry = list_first_entry(&intel->batch_pixmaps,
+ entry = list_first_entry(&batch->batch_pixmaps,
struct intel_pixmap,
batch);
@@ -224,14 +247,14 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush)
list_del(&entry->batch);
}
- intel->need_mi_flush |= !list_is_empty(&intel->flush_pixmaps);
- while (!list_is_empty(&intel->flush_pixmaps))
- list_del(intel->flush_pixmaps.next);
+ batch->need_mi_flush |= !list_is_empty(&batch->flush_pixmaps);
+ while (!list_is_empty(&batch->flush_pixmaps))
+ list_del(batch->flush_pixmaps.next);
- while (!list_is_empty(&intel->in_flight)) {
+ while (!list_is_empty(&batch->in_flight)) {
struct intel_pixmap *entry;
- entry = list_first_entry(&intel->in_flight,
+ entry = list_first_entry(&batch->in_flight,
struct intel_pixmap,
in_flight);
@@ -243,38 +266,40 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush)
/* Save a ref to the last batch emitted, which we use for syncing
* in debug code.
*/
- dri_bo_unreference(intel->last_batch_bo);
- intel->last_batch_bo = intel->batch_bo;
- intel->batch_bo = NULL;
+ dri_bo_unreference(batch->last_batch_bo);
+ batch->last_batch_bo = batch->batch_bo;
+ batch->batch_bo = NULL;
- intel_next_batch(scrn);
+ intel_next_batch(scrn, batch_idx);
if (intel->debug_flush & DEBUG_FLUSH_WAIT)
- intel_batch_wait_last(scrn);
+ intel_batch_wait_last(scrn, batch_idx);
if (intel->batch_flush_notify)
- intel->batch_flush_notify(scrn);
+ intel->batch_flush_notify(scrn, batch_idx);
}
/** Waits on the last emitted batchbuffer to be completed. */
-void intel_batch_wait_last(ScrnInfoPtr scrn)
+void intel_batch_wait_last(ScrnInfoPtr scrn, int batch_idx)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
+ struct batch *batch = &intel->batch[batch_idx];
/* Map it CPU write, which guarantees it's done. This is a completely
* non performance path, so we don't need anything better.
*/
- drm_intel_gem_bo_map_gtt(intel->last_batch_bo);
- drm_intel_gem_bo_unmap_gtt(intel->last_batch_bo);
+ drm_intel_gem_bo_map_gtt(batch->last_batch_bo);
+ drm_intel_gem_bo_unmap_gtt(batch->last_batch_bo);
}
-void intel_debug_flush(ScrnInfoPtr scrn)
+void intel_debug_flush(ScrnInfoPtr scrn, int batch_idx)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
if (intel->debug_flush & DEBUG_FLUSH_CACHES)
- intel_batch_emit_flush(scrn);
+ intel_batch_emit_flush(scrn, batch_idx);
if (intel->debug_flush & DEBUG_FLUSH_BATCHES)
- intel_batch_submit(scrn, FALSE);
+ intel_batch_submit(scrn, FALSE, batch_idx);
}
+
diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h
index bf7a5d9..1ed3ad8 100644
--- a/src/intel_batchbuffer.h
+++ b/src/intel_batchbuffer.h
@@ -35,14 +35,15 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
void intel_batch_init(ScrnInfoPtr scrn);
void intel_batch_teardown(ScrnInfoPtr scrn);
-void intel_batch_emit_flush(ScrnInfoPtr scrn);
-void intel_batch_do_flush(ScrnInfoPtr scrn);
-void intel_batch_submit(ScrnInfoPtr scrn, int flush);
-void intel_batch_wait_last(ScrnInfoPtr scrn);
+void intel_batch_emit_flush(ScrnInfoPtr scrn, int batch_idx);
+void intel_batch_do_flush(ScrnInfoPtr scrn, int batch_idx);
+void intel_batch_submit(ScrnInfoPtr scrn, int flush, int batch_idx);
+void intel_batch_wait_last(ScrnInfoPtr scrn, int batch_idx);
-static inline int intel_batch_space(intel_screen_private *intel)
+static inline int intel_batch_space(intel_screen_private *intel, int batch_idx)
{
- return (intel->batch_bo->size - BATCH_RESERVED) - (4*intel->batch_used);
+ return (intel->batch[batch_idx].batch_bo->size - BATCH_RESERVED) -
+ (4*intel->batch[batch_idx].batch_used);
}
static inline int intel_vertex_space(intel_screen_private *intel)
@@ -51,49 +52,50 @@ static inline int intel_vertex_space(intel_screen_private *intel)
}
static inline void
-intel_batch_require_space(ScrnInfoPtr scrn, intel_screen_private *intel, unsigned int sz)
+intel_batch_require_space(ScrnInfoPtr scrn, intel_screen_private *intel, unsigned int sz, int batch_idx)
{
- assert(sz < intel->batch_bo->size - 8);
- if (intel_batch_space(intel) < sz)
- intel_batch_submit(scrn, FALSE);
+ assert(sz < intel->batch[batch_idx].batch_bo->size - 8);
+ if (intel_batch_space(intel, batch_idx) < sz)
+ intel_batch_submit(scrn, FALSE, batch_idx);
}
-static inline void intel_batch_start_atomic(ScrnInfoPtr scrn, unsigned int sz)
+static inline void intel_batch_start_atomic(ScrnInfoPtr scrn, unsigned int sz, int batch_idx)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
- assert(!intel->in_batch_atomic);
- intel_batch_require_space(scrn, intel, sz * 4);
+ assert(!intel->batch[batch_idx].in_batch_atomic);
+ intel_batch_require_space(scrn, intel, sz * 4, batch_idx);
- intel->in_batch_atomic = TRUE;
- intel->batch_atomic_limit = intel->batch_used + sz;
+ intel->batch[batch_idx].in_batch_atomic = TRUE;
+ intel->batch[batch_idx].batch_atomic_limit = intel->batch[batch_idx].batch_used + sz;
}
-static inline void intel_batch_end_atomic(ScrnInfoPtr scrn)
+static inline void intel_batch_end_atomic(ScrnInfoPtr scrn, int batch_idx)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
-
- assert(intel->in_batch_atomic);
- assert(intel->batch_used <= intel->batch_atomic_limit);
- intel->in_batch_atomic = FALSE;
+ struct batch *batch = &intel->batch[batch_idx];
+ assert(batch->in_batch_atomic);
+ assert(batch->batch_used <= batch->batch_atomic_limit);
+ batch->in_batch_atomic = FALSE;
}
-static inline void intel_batch_emit_dword(intel_screen_private *intel, uint32_t dword)
+static inline void intel_batch_emit_dword(intel_screen_private *intel, uint32_t dword, int batch_idx)
{
- intel->batch_ptr[intel->batch_used++] = dword;
+ struct batch *batch = &intel->batch[batch_idx];
+ batch->batch_ptr[batch->batch_used++] = dword;
}
-static inline void intel_batch_align(intel_screen_private *intel, uint32_t align)
+static inline void intel_batch_align(intel_screen_private *intel, uint32_t align, int batch_idx)
{
uint32_t delta;
-
+ struct batch *batch = &intel->batch[batch_idx];
align /= 4;
assert(align);
- if ((delta = intel->batch_used & (align - 1))) {
+ if ((delta = batch->batch_used & (align - 1))) {
delta = align - delta;
- memset (intel->batch_ptr + intel->batch_used, 0, 4*delta);
- intel->batch_used += delta;
+ memset (batch->batch_ptr + batch->batch_used, 0, 4*delta);
+ batch->batch_used += delta;
}
}
@@ -101,33 +103,38 @@ static inline void
intel_batch_emit_reloc(intel_screen_private *intel,
dri_bo * bo,
uint32_t read_domains,
- uint32_t write_domains, uint32_t delta, int needs_fence)
+ uint32_t write_domains, uint32_t delta, int needs_fence,
+ int batch_idx)
{
+
+ struct batch *batch = &intel->batch[batch_idx];
if (needs_fence)
- drm_intel_bo_emit_reloc_fence(intel->batch_bo,
- intel->batch_used * 4,
+ drm_intel_bo_emit_reloc_fence(batch->batch_bo,
+ batch->batch_used * 4,
bo, delta,
read_domains, write_domains);
else
- drm_intel_bo_emit_reloc(intel->batch_bo, intel->batch_used * 4,
+ drm_intel_bo_emit_reloc(batch->batch_bo, batch->batch_used * 4,
bo, delta,
read_domains, write_domains);
- intel_batch_emit_dword(intel, bo->offset + delta);
+ intel_batch_emit_dword(intel, bo->offset + delta, batch_idx);
}
static inline void
intel_batch_mark_pixmap_domains(intel_screen_private *intel,
struct intel_pixmap *priv,
- uint32_t read_domains, uint32_t write_domain)
+ uint32_t read_domains, uint32_t write_domain,
+ int batch_idx)
{
+ struct batch *batch = &intel->batch[batch_idx];
assert (read_domains);
assert (write_domain == 0 || write_domain == read_domains);
if (list_is_empty(&priv->batch))
- list_add(&priv->batch, &intel->batch_pixmaps);
+ list_add(&priv->batch, &batch->batch_pixmaps);
if (write_domain && list_is_empty(&priv->flush))
- list_add(&priv->flush, &intel->flush_pixmaps);
+ list_add(&priv->flush, &batch->flush_pixmaps);
priv->batch_write |= write_domain != 0;
priv->busy = 1;
@@ -136,31 +143,32 @@ intel_batch_mark_pixmap_domains(intel_screen_private *intel,
static inline void
intel_batch_emit_reloc_pixmap(intel_screen_private *intel, PixmapPtr pixmap,
uint32_t read_domains, uint32_t write_domain,
- uint32_t delta, int needs_fence)
+ uint32_t delta, int needs_fence, int batch_idx)
{
struct intel_pixmap *priv = intel_get_pixmap_private(pixmap);
- intel_batch_mark_pixmap_domains(intel, priv, read_domains, write_domain);
+ intel_batch_mark_pixmap_domains(intel, priv, read_domains,
+ write_domain, batch_idx);
intel_batch_emit_reloc(intel, priv->bo,
read_domains, write_domain,
- delta, needs_fence);
+ delta, needs_fence, batch_idx);
}
-#define ALIGN_BATCH(align) intel_batch_align(intel, align);
-#define OUT_BATCH(dword) intel_batch_emit_dword(intel, dword)
+#define ALIGN_BATCH(align) intel_batch_align(intel, align, RENDER_BATCH);
+#define OUT_BATCH(dword) intel_batch_emit_dword(intel, dword, RENDER_BATCH)
#define OUT_RELOC(bo, read_domains, write_domains, delta) \
- intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 0)
+ intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 0,RENDER_BATCH)
#define OUT_RELOC_FENCED(bo, read_domains, write_domains, delta) \
- intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 1)
+ intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 1,RENDER_BATCH)
#define OUT_RELOC_PIXMAP(pixmap, reads, write, delta) \
- intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0)
+ intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0, RENDER_BATCH)
#define OUT_RELOC_PIXMAP_FENCED(pixmap, reads, write, delta) \
- intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1)
+ intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1, RENDER_BATCH)
union intfloat {
float f;
@@ -175,32 +183,34 @@ union intfloat {
#define BEGIN_BATCH(n) \
do { \
- if (intel->batch_emitting != 0) \
+ struct batch *batch = &intel->batch[RENDER_BATCH]; \
+ if (batch->batch_emitting != 0) \
FatalError("%s: BEGIN_BATCH called without closing " \
"ADVANCE_BATCH\n", __FUNCTION__); \
- assert(!intel->in_batch_atomic); \
- intel_batch_require_space(scrn, intel, (n) * 4); \
- intel->batch_emitting = (n); \
- intel->batch_emit_start = intel->batch_used; \
+ assert(!batch->in_batch_atomic); \
+ intel_batch_require_space(scrn, intel, (n) * 4, RENDER_BATCH); \
+ batch->batch_emitting = (n); \
+ batch->batch_emit_start = batch->batch_used; \
} while (0)
#define ADVANCE_BATCH() do { \
- if (intel->batch_emitting == 0) \
+ struct batch *batch = &intel->batch[RENDER_BATCH]; \
+ if (batch->batch_emitting == 0) \
FatalError("%s: ADVANCE_BATCH called with no matching " \
"BEGIN_BATCH\n", __FUNCTION__); \
- if (intel->batch_used > \
- intel->batch_emit_start + intel->batch_emitting) \
+ if (batch->batch_used > \
+ batch->batch_emit_start + batch->batch_emitting) \
FatalError("%s: ADVANCE_BATCH: exceeded allocation %d/%d\n ", \
__FUNCTION__, \
- intel->batch_used - intel->batch_emit_start, \
- intel->batch_emitting); \
- if (intel->batch_used < intel->batch_emit_start + \
- intel->batch_emitting) \
+ batch->batch_used - batch->batch_emit_start, \
+ batch->batch_emitting); \
+ if (batch->batch_used < batch->batch_emit_start + \
+ batch->batch_emitting) \
FatalError("%s: ADVANCE_BATCH: under-used allocation %d/%d\n ", \
__FUNCTION__, \
- intel->batch_used - intel->batch_emit_start, \
- intel->batch_emitting); \
- intel->batch_emitting = 0; \
+ batch->batch_used - batch->batch_emit_start, \
+ batch->batch_emitting); \
+ batch->batch_emitting = 0; \
} while (0)
void intel_next_vertex(intel_screen_private *intel);
diff --git a/src/intel_display.c b/src/intel_display.c
index d32224e..78f83aa 100644
--- a/src/intel_display.c
+++ b/src/intel_display.c
@@ -552,7 +552,7 @@ intel_crtc_shadow_create(xf86CrtcPtr crtc, void *data, int width, int height)
return NULL;
}
- intel_set_pixmap_bo(rotate_pixmap, intel_crtc->rotate_bo);
+ intel_set_pixmap_bo(rotate_pixmap, intel_crtc->rotate_bo, RENDER_BATCH);
intel->shadow_present = TRUE;
@@ -568,7 +568,7 @@ intel_crtc_shadow_destroy(xf86CrtcPtr crtc, PixmapPtr rotate_pixmap, void *data)
struct intel_mode *mode = intel_crtc->mode;
if (rotate_pixmap) {
- intel_set_pixmap_bo(rotate_pixmap, NULL);
+ intel_set_pixmap_bo(rotate_pixmap, NULL, RENDER_BATCH);
FreeScratchPixmapHeader(rotate_pixmap);
}
diff --git a/src/intel_dri.c b/src/intel_dri.c
index 67f7be9..b4876c8 100644
--- a/src/intel_dri.c
+++ b/src/intel_dri.c
@@ -93,7 +93,8 @@ static PixmapPtr get_front_buffer(DrawablePtr drawable)
intel->front_pitch,
intel->front_buffer->virtual);
- intel_set_pixmap_bo(pixmap, intel->front_buffer);
+ intel_set_pixmap_bo(pixmap, intel->front_buffer,
+ RENDER_BATCH);
intel_get_pixmap_private(pixmap)->offscreen = 0;
if (WindowDrawable(drawable->type))
screen->SetWindowPixmap((WindowPtr)drawable,
diff --git a/src/intel_driver.c b/src/intel_driver.c
index 7e4a4a4..b9fb69d 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -431,9 +431,15 @@ static int intel_init_bufmgr(intel_screen_private *intel)
drm_intel_bufmgr_gem_enable_reuse(intel->bufmgr);
drm_intel_bufmgr_gem_enable_fenced_relocs(intel->bufmgr);
- list_init(&intel->batch_pixmaps);
- list_init(&intel->flush_pixmaps);
- list_init(&intel->in_flight);
+ list_init(&intel->batch[RENDER_BATCH].batch_pixmaps);
+ list_init(&intel->batch[RENDER_BATCH].flush_pixmaps);
+ list_init(&intel->batch[RENDER_BATCH].in_flight);
+
+ if (IS_GEN6(intel)) {
+ list_init(&intel->batch[BLT_BATCH].batch_pixmaps);
+ list_init(&intel->batch[BLT_BATCH].flush_pixmaps);
+ list_init(&intel->batch[BLT_BATCH].in_flight);
+ }
return TRUE;
}
@@ -801,8 +807,8 @@ intel_flush_callback(CallbackListPtr *list,
* framebuffer until significantly later.
*/
intel_batch_submit(scrn,
- intel->need_mi_flush ||
- !list_is_empty(&intel->flush_pixmaps));
+ intel->batch[RENDER_BATCH].need_mi_flush
+ ||!list_is_empty(&intel->batch[RENDER_BATCH].flush_pixmaps), RENDER_BATCH);
}
}
@@ -1220,7 +1226,7 @@ static Bool I830CloseScreen(int scrnIndex, ScreenPtr screen)
if (intel->front_buffer) {
if (!intel->use_shadow)
intel_set_pixmap_bo(screen->GetScreenPixmap(screen),
- NULL);
+ NULL, RENDER_BATCH);
intel_mode_remove_fb(intel);
drm_intel_bo_unreference(intel->front_buffer);
intel->front_buffer = NULL;
diff --git a/src/intel_uxa.c b/src/intel_uxa.c
index 14c47a0..05ac3d2 100644
--- a/src/intel_uxa.c
+++ b/src/intel_uxa.c
@@ -106,21 +106,21 @@ ironlake_blt_workaround(ScrnInfoPtr scrn)
Bool
intel_get_aperture_space(ScrnInfoPtr scrn, drm_intel_bo ** bo_table,
- int num_bos)
+ int num_bos, int batch_idx)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
+ struct batch *batch = &intel->batch[batch_idx];
- if (intel->batch_bo == NULL) {
+ if (batch->batch_bo == NULL) {
intel_debug_fallback(scrn, "VT inactive\n");
return FALSE;
}
- bo_table[0] = intel->batch_bo;
+ bo_table[0] = batch->batch_bo;
if (drm_intel_bufmgr_check_aperture_space(bo_table, num_bos) != 0) {
- intel_batch_submit(scrn, FALSE);
- bo_table[0] = intel->batch_bo;
- if (drm_intel_bufmgr_check_aperture_space(bo_table, num_bos) !=
- 0) {
+ intel_batch_submit(scrn, FALSE, batch_idx);
+ bo_table[0] = batch->batch_bo;
+ if (drm_intel_bufmgr_check_aperture_space(bo_table, num_bos) != 0) {
intel_debug_fallback(scrn, "Couldn't get aperture "
"space for BOs\n");
return FALSE;
@@ -252,7 +252,8 @@ i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
if (!intel_check_pitch_2d(pixmap))
return FALSE;
- if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
+ if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table),
+ RENDER_BATCH))
return FALSE;
intel->BR[13] = (I830PatternROP[alu] & 0xff) << 16;
@@ -327,7 +328,7 @@ static void i830_uxa_done_solid(PixmapPtr pixmap)
{
ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
- intel_debug_flush(scrn);
+ intel_debug_flush(scrn, RENDER_BATCH);
}
/**
@@ -385,7 +386,8 @@ i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
intel_get_pixmap_bo(dest),
};
- if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
+ if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table),
+ RENDER_BATCH))
return FALSE;
intel->render_source = source;
@@ -496,7 +498,7 @@ static void i830_uxa_done_copy(PixmapPtr dest)
{
ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
- intel_debug_flush(scrn);
+ intel_debug_flush(scrn, RENDER_BATCH);
}
/**
@@ -512,7 +514,7 @@ static void i830_done_composite(PixmapPtr dest)
if (intel->vertex_flush)
intel->vertex_flush(intel);
- intel_debug_flush(scrn);
+ intel_debug_flush(scrn, RENDER_BATCH);
}
#define xFixedToFloat(val) \
@@ -609,7 +611,7 @@ dri_bo *intel_get_pixmap_bo(PixmapPtr pixmap)
return intel->bo;
}
-void intel_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
+void intel_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo, int batch_idx)
{
ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -631,7 +633,7 @@ void intel_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
list_del(&priv->batch);
list_del(&priv->flush);
} else {
- list_add(&priv->in_flight, &intel->in_flight);
+ list_add(&priv->in_flight, &intel->batch[batch_idx].in_flight);
priv = NULL;
}
@@ -695,7 +697,7 @@ static Bool intel_uxa_prepare_access(PixmapPtr pixmap, uxa_access_t access)
if (!list_is_empty(&priv->batch) &&
(access == UXA_ACCESS_RW || priv->batch_write))
- intel_batch_submit(scrn, FALSE);
+ intel_batch_submit(scrn, FALSE, RENDER_BATCH);
if (priv->tiling || bo->size <= intel->max_gtt_map_size)
ret = drm_intel_gem_bo_map_gtt(bo);
@@ -788,7 +790,7 @@ static Bool intel_uxa_put_image(PixmapPtr pixmap,
w, h,
0, 0,
stride, NULL);
- intel_set_pixmap_bo(pixmap, bo);
+ intel_set_pixmap_bo(pixmap, bo, RENDER_BATCH);
dri_bo_unreference(bo);
return intel_uxa_pixmap_put_image(pixmap, src, src_pitch, 0, 0, w, h);
@@ -909,7 +911,7 @@ static Bool intel_uxa_get_image(PixmapPtr pixmap,
FreeScratchGC(gc);
- intel_batch_submit(xf86Screens[screen->myNum], FALSE);
+ intel_batch_submit(xf86Screens[screen->myNum], FALSE, RENDER_BATCH);
x = y = 0;
pixmap = scratch;
@@ -932,7 +934,7 @@ void intel_uxa_block_handler(intel_screen_private *intel)
* and beyond rendering results may not hit the
* framebuffer until significantly later.
*/
- intel_batch_submit(intel->scrn, TRUE);
+ intel_batch_submit(intel->scrn, TRUE, RENDER_BATCH);
DamageEmpty(intel->shadow_damage);
}
@@ -965,6 +967,8 @@ intel_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
unsigned int size, tiling;
int stride;
+ struct batch *batch = &intel->batch[RENDER_BATCH];
+
/* Always attempt to tile, compute_size() will remove the
* tiling for pixmaps that are either too large or too small
* to be effectively tiled.
@@ -1008,7 +1012,7 @@ intel_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
aligned_h = ALIGN(h, 2);
list_foreach_entry(priv, struct intel_pixmap,
- &intel->in_flight,
+ &batch->in_flight,
in_flight) {
if (priv->tiling != tiling)
continue;
@@ -1074,7 +1078,7 @@ intel_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
static Bool intel_uxa_destroy_pixmap(PixmapPtr pixmap)
{
if (pixmap->refcnt == 1)
- intel_set_pixmap_bo(pixmap, NULL);
+ intel_set_pixmap_bo(pixmap, NULL, RENDER_BATCH);
fbDestroyPixmap(pixmap);
return TRUE;
}
@@ -1091,7 +1095,7 @@ void intel_uxa_create_screen_resources(ScreenPtr screen)
intel_shadow_create(intel);
} else {
PixmapPtr pixmap = screen->GetScreenPixmap(screen);
- intel_set_pixmap_bo(pixmap, bo);
+ intel_set_pixmap_bo(pixmap, bo, RENDER_BATCH);
intel_get_pixmap_private(pixmap)->busy = 1;
screen->ModifyPixmapHeader(pixmap,
scrn->virtualX,
--
1.7.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/2] use BLT command to accelerate uxa on gen6.
2010-10-26 7:33 [PATCH 1/2] split render engine batch buffer and BLT engine Zou Nan hai
@ 2010-10-26 7:33 ` Zou Nan hai
2010-10-26 8:17 ` [PATCH 1/2] split render engine batch buffer and BLT engine Chris Wilson
1 sibling, 0 replies; 9+ messages in thread
From: Zou Nan hai @ 2010-10-26 7:33 UTC (permalink / raw)
To: intel-gfx
uxa: enable accelerate for uxa_copy and uxa_solid
on gen6.
Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
---
src/i830_reg.h | 2 +
src/intel_batchbuffer.c | 35 ++++++--
src/intel_batchbuffer.h | 31 ++++++-
src/intel_driver.c | 3 +-
src/intel_uxa.c | 230 +++++++++++++++++++++++++++++++++++++++--------
5 files changed, 248 insertions(+), 53 deletions(-)
diff --git a/src/i830_reg.h b/src/i830_reg.h
index 4080896..93d03cf 100644
--- a/src/i830_reg.h
+++ b/src/i830_reg.h
@@ -32,6 +32,8 @@
/* Flush */
#define MI_FLUSH (0x04<<23)
+#define MI_FLUSH_DW (0x26<<23)
+
#define MI_WRITE_DIRTY_STATE (1<<4)
#define MI_END_SCENE (1<<3)
#define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3)
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index 3b72ba1..cde086b 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -171,6 +171,12 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn, int batch_idx)
intel_batch_do_flush(scrn, batch_idx);
} else {
+ BEGIN_BATCH_BLT(4);
+ OUT_BATCH_BLT(MI_FLUSH_DW | 2);
+ OUT_BATCH_BLT(0);
+ OUT_BATCH_BLT(0);
+ OUT_BATCH_BLT(0);
+ ADVANCE_BATCH_BLT();
}
}
@@ -193,13 +199,22 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush, int batch_idx)
if (batch->batch_used == 0)
return;
+
+ if (batch_idx == RENDER_BATCH) {
+ /* Mark the end of the batchbuffer. */
+ OUT_BATCH(MI_BATCH_BUFFER_END);
+ /* Emit a padding dword if we aren't going to be quad-word aligned. */
+ if (batch->batch_used & 1)
+ OUT_BATCH(MI_NOOP);
+ } else {
+ /* Mark the end of the batchbuffer. */
+ OUT_BATCH_BLT(MI_BATCH_BUFFER_END);
+ /* Emit a padding dword if we aren't going to be quad-word aligned. */
+ if (batch->batch_used & 1)
+ OUT_BATCH_BLT(MI_NOOP);
+ }
- /* Mark the end of the batchbuffer. */
- OUT_BATCH(MI_BATCH_BUFFER_END);
- /* Emit a padding dword if we aren't going to be quad-word aligned. */
- if (batch->batch_used & 1)
- OUT_BATCH(MI_NOOP);
-
+
if (DUMP_BATCHBUFFERS) {
FILE *file = fopen(DUMP_BATCHBUFFERS, "a");
if (file) {
@@ -211,9 +226,13 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush, int batch_idx)
ret = dri_bo_subdata(batch->batch_bo, 0, batch->batch_used*4, batch->batch_ptr);
if (ret == 0) {
if (batch_idx == RENDER_BATCH) {
- ret = dri_bo_exec(batch->batch_bo, batch->batch_used*4,
- NULL, 0, 0xffffffff);
+ ret = drm_intel_bo_mrb_exec(batch->batch_bo,
+ batch->batch_used*4,
+ NULL, 0, 0xffffffff, I915_EXEC_RENDER);
} else {
+ ret = drm_intel_bo_mrb_exec(batch->batch_bo,
+ batch->batch_used*4,
+ NULL, 0, 0xffffffff, I915_EXEC_BLIT);
}
}
if (ret != 0) {
diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h
index 1ed3ad8..6d1ee15 100644
--- a/src/intel_batchbuffer.h
+++ b/src/intel_batchbuffer.h
@@ -156,20 +156,35 @@ intel_batch_emit_reloc_pixmap(intel_screen_private *intel, PixmapPtr pixmap,
}
#define ALIGN_BATCH(align) intel_batch_align(intel, align, RENDER_BATCH);
+#define ALIGN_BATCH_BLT(align) intel_batch_align(intel, align, BLT_BATCH);
+
#define OUT_BATCH(dword) intel_batch_emit_dword(intel, dword, RENDER_BATCH)
+#define OUT_BATCH_BLT(dword) intel_batch_emit_dword(intel, dword, BLT_BATCH)
#define OUT_RELOC(bo, read_domains, write_domains, delta) \
intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 0,RENDER_BATCH)
+#define OUT_RELOC_BLT(bo, read_domains, write_domains, delta) \
+ intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 0,BLT_BATCH)
+
#define OUT_RELOC_FENCED(bo, read_domains, write_domains, delta) \
intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 1,RENDER_BATCH)
+#define OUT_RELOC_FENCED_BLT(bo, read_domains, write_domains, delta) \
+ intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 1,BLT_BATCH)
+
#define OUT_RELOC_PIXMAP(pixmap, reads, write, delta) \
intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0, RENDER_BATCH)
+#define OUT_RELOC_PIXMAP_BLT(pixmap, reads, write, delta) \
+ intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0, BLT_BATCH)
+
#define OUT_RELOC_PIXMAP_FENCED(pixmap, reads, write, delta) \
intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1, RENDER_BATCH)
+#define OUT_RELOC_PIXMAP_FENCED_BLT(pixmap, reads, write, delta) \
+ intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1, BLT_BATCH)
+
union intfloat {
float f;
unsigned int ui;
@@ -181,20 +196,23 @@ union intfloat {
OUT_BATCH(tmp.ui); \
} while(0)
-#define BEGIN_BATCH(n) \
+#define __BEGIN_BATCH(n,batch_idx) \
do { \
- struct batch *batch = &intel->batch[RENDER_BATCH]; \
+ struct batch *batch = &intel->batch[batch_idx]; \
if (batch->batch_emitting != 0) \
FatalError("%s: BEGIN_BATCH called without closing " \
"ADVANCE_BATCH\n", __FUNCTION__); \
assert(!batch->in_batch_atomic); \
- intel_batch_require_space(scrn, intel, (n) * 4, RENDER_BATCH); \
+ intel_batch_require_space(scrn, intel, (n) * 4, batch_idx); \
batch->batch_emitting = (n); \
batch->batch_emit_start = batch->batch_used; \
} while (0)
-#define ADVANCE_BATCH() do { \
- struct batch *batch = &intel->batch[RENDER_BATCH]; \
+#define BEGIN_BATCH(n) __BEGIN_BATCH(n,RENDER_BATCH)
+#define BEGIN_BATCH_BLT(n) __BEGIN_BATCH(n,BLT_BATCH)
+
+#define __ADVANCE_BATCH(batch_idx) do { \
+ struct batch *batch = &intel->batch[batch_idx]; \
if (batch->batch_emitting == 0) \
FatalError("%s: ADVANCE_BATCH called with no matching " \
"BEGIN_BATCH\n", __FUNCTION__); \
@@ -213,6 +231,9 @@ do { \
batch->batch_emitting = 0; \
} while (0)
+#define ADVANCE_BATCH(batch_idx) __ADVANCE_BATCH(RENDER_BATCH)
+#define ADVANCE_BATCH_BLT(batch_idx) __ADVANCE_BATCH(BLT_BATCH)
+
void intel_next_vertex(intel_screen_private *intel);
static inline void intel_vertex_emit(intel_screen_private *intel, float v)
{
diff --git a/src/intel_driver.c b/src/intel_driver.c
index b9fb69d..051497d 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -581,8 +581,6 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags)
}
intel->use_shadow = FALSE;
- if (IS_GEN6(intel))
- intel->use_shadow = TRUE;
if (xf86IsOptionSet(intel->Options, OPTION_SHADOW)) {
intel->use_shadow =
@@ -809,6 +807,7 @@ intel_flush_callback(CallbackListPtr *list,
intel_batch_submit(scrn,
intel->batch[RENDER_BATCH].need_mi_flush
||!list_is_empty(&intel->batch[RENDER_BATCH].flush_pixmaps), RENDER_BATCH);
+
}
}
diff --git a/src/intel_uxa.c b/src/intel_uxa.c
index 05ac3d2..cbd87ca 100644
--- a/src/intel_uxa.c
+++ b/src/intel_uxa.c
@@ -208,17 +208,9 @@ intel_uxa_pixmap_compute_size(PixmapPtr pixmap,
}
static Bool
-i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask)
+generic_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask)
{
ScrnInfoPtr scrn = xf86Screens[drawable->pScreen->myNum];
- intel_screen_private *intel = intel_get_screen_private(scrn);
-
- if (IS_GEN6(intel)) {
- intel_debug_fallback(scrn,
- "Sandybridge BLT engine not supported\n");
- return FALSE;
- }
-
if (!UXA_PM_IS_SOLID(drawable, planemask)) {
intel_debug_fallback(scrn, "planemask is not solid\n");
return FALSE;
@@ -232,7 +224,6 @@ i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask)
default:
return FALSE;
}
-
return TRUE;
}
@@ -240,7 +231,7 @@ i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask)
* Sets up hardware state for a series of solid fills.
*/
static Bool
-i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
+generic_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
{
ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -252,10 +243,17 @@ i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
if (!intel_check_pitch_2d(pixmap))
return FALSE;
- if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table),
- RENDER_BATCH))
- return FALSE;
-
+ if (IS_GEN6(intel)) {
+ if (!intel_get_aperture_space(scrn, bo_table,
+ ARRAY_SIZE(bo_table),
+ BLT_BATCH))
+ return FALSE;
+ } else {
+ if (!intel_get_aperture_space(scrn, bo_table,
+ ARRAY_SIZE(bo_table),
+ RENDER_BATCH))
+ return FALSE;
+ }
intel->BR[13] = (I830PatternROP[alu] & 0xff) << 16;
switch (pixmap->drawable.bitsPerPixel) {
case 8:
@@ -274,6 +272,52 @@ i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
return TRUE;
}
+static void gen6_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2)
+{
+ ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ unsigned long pitch;
+ uint32_t cmd;
+ if (x1 < 0)
+ x1 = 0;
+ if (y1 < 0)
+ y1 = 0;
+ if (x2 > pixmap->drawable.width)
+ x2 = pixmap->drawable.width;
+ if (y2 > pixmap->drawable.height)
+ y2 = pixmap->drawable.height;
+
+ if (x2 <= x1 || y2 <= y1)
+ return;
+
+ pitch = intel_pixmap_pitch(pixmap);
+ {
+ BEGIN_BATCH_BLT(6);
+
+ cmd = XY_COLOR_BLT_CMD;
+
+ if (pixmap->drawable.bitsPerPixel == 32)
+ cmd |=
+ XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
+
+ if (intel_pixmap_tiled(pixmap)) {
+ assert((pitch % 512) == 0);
+ pitch >>= 2;
+ cmd |= XY_COLOR_BLT_TILED;
+ }
+
+ OUT_BATCH_BLT(cmd);
+
+ OUT_BATCH_BLT(intel->BR[13] | pitch);
+ OUT_BATCH_BLT((y1 << 16) | (x1 & 0xffff));
+ OUT_BATCH_BLT((y2 << 16) | (x2 & 0xffff));
+ OUT_RELOC_PIXMAP_FENCED_BLT(pixmap, I915_GEM_DOMAIN_RENDER,
+ 0, 0);
+ OUT_BATCH_BLT(intel->BR[16]);
+ ADVANCE_BATCH_BLT();
+ }
+}
+
static void i830_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2)
{
ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
@@ -324,10 +368,15 @@ static void i830_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2)
ironlake_blt_workaround(scrn);
}
-static void i830_uxa_done_solid(PixmapPtr pixmap)
+static void gen6_uxa_done_solid(PixmapPtr pixmap)
{
ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
+ intel_batch_submit(scrn, FALSE, BLT_BATCH);
+}
+static void i830_uxa_done_solid(PixmapPtr pixmap)
+{
+ ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
intel_debug_flush(scrn, RENDER_BATCH);
}
@@ -336,17 +385,10 @@ static void i830_uxa_done_solid(PixmapPtr pixmap)
* - support planemask using FULL_BLT_CMD?
*/
static Bool
-i830_uxa_check_copy(PixmapPtr source, PixmapPtr dest,
+generic_uxa_check_copy(PixmapPtr source, PixmapPtr dest,
int alu, Pixel planemask)
{
ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
- intel_screen_private *intel = intel_get_screen_private(scrn);
-
- if (IS_GEN6(intel)) {
- intel_debug_fallback(scrn,
- "Sandybridge BLT engine not supported\n");
- return FALSE;
- }
if (!UXA_PM_IS_SOLID(&source->drawable, planemask)) {
intel_debug_fallback(scrn, "planemask is not solid");
@@ -375,7 +417,7 @@ i830_uxa_check_copy(PixmapPtr source, PixmapPtr dest,
}
static Bool
-i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
+generic_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
int ydir, int alu, Pixel planemask)
{
ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
@@ -386,9 +428,18 @@ i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
intel_get_pixmap_bo(dest),
};
- if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table),
- RENDER_BATCH))
- return FALSE;
+
+ if (IS_GEN6(intel)) {
+ if (!intel_get_aperture_space(scrn, bo_table,
+ ARRAY_SIZE(bo_table),
+ BLT_BATCH))
+ return FALSE;
+ } else {
+ if (!intel_get_aperture_space(scrn, bo_table,
+ ARRAY_SIZE(bo_table),
+ RENDER_BATCH))
+ return FALSE;
+ }
intel->render_source = source;
@@ -408,6 +459,90 @@ i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
}
static void
+gen6_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1,
+ int dst_y1, int w, int h)
+{
+ ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ uint32_t cmd;
+ int dst_x2, dst_y2, src_x2, src_y2;
+ unsigned int dst_pitch, src_pitch;
+
+ dst_x2 = dst_x1 + w;
+ dst_y2 = dst_y1 + h;
+
+ /* XXX Fixup extents as a lamentable workaround for missing
+ * source clipping in the upper layers.
+ */
+ if (dst_x1 < 0)
+ src_x1 -= dst_x1, dst_x1 = 0;
+ if (dst_y1 < 0)
+ src_y1 -= dst_y1, dst_y1 = 0;
+ if (dst_x2 > dest->drawable.width)
+ dst_x2 = dest->drawable.width;
+ if (dst_y2 > dest->drawable.height)
+ dst_y2 = dest->drawable.height;
+
+ src_x2 = src_x1 + (dst_x2 - dst_x1);
+ src_y2 = src_y1 + (dst_y2 - dst_y1);
+
+ if (src_x1 < 0)
+ dst_x1 -= src_x1, src_x1 = 0;
+ if (src_y1 < 0)
+ dst_y1 -= src_y1, src_y1 = 0;
+ if (src_x2 > intel->render_source->drawable.width)
+ dst_x2 -= src_x2 - intel->render_source->drawable.width;
+ if (src_y2 > intel->render_source->drawable.height)
+ dst_y2 -= src_y2 - intel->render_source->drawable.height;
+
+ if (dst_x2 <= dst_x1 || dst_y2 <= dst_y1)
+ return;
+
+ dst_pitch = intel_pixmap_pitch(dest);
+ src_pitch = intel_pixmap_pitch(intel->render_source);
+ {
+ BEGIN_BATCH_BLT(8);
+
+ cmd = XY_SRC_COPY_BLT_CMD;
+
+ if (dest->drawable.bitsPerPixel == 32)
+ cmd |=
+ XY_SRC_COPY_BLT_WRITE_ALPHA |
+ XY_SRC_COPY_BLT_WRITE_RGB;
+
+ if (INTEL_INFO(intel)->gen >= 40) {
+ if (intel_pixmap_tiled(dest)) {
+ assert((dst_pitch % 512) == 0);
+ dst_pitch >>= 2;
+ cmd |= XY_SRC_COPY_BLT_DST_TILED;
+ }
+
+ if (intel_pixmap_tiled(intel->render_source)) {
+ assert((src_pitch % 512) == 0);
+ src_pitch >>= 2;
+ cmd |= XY_SRC_COPY_BLT_SRC_TILED;
+ }
+ }
+
+ OUT_BATCH_BLT(cmd);
+
+ OUT_BATCH_BLT(intel->BR[13] | dst_pitch);
+ OUT_BATCH_BLT((dst_y1 << 16) | (dst_x1 & 0xffff));
+ OUT_BATCH_BLT((dst_y2 << 16) | (dst_x2 & 0xffff));
+ OUT_RELOC_PIXMAP_FENCED_BLT(dest,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ 0);
+ OUT_BATCH_BLT((src_y1 << 16) | (src_x1 & 0xffff));
+ OUT_BATCH_BLT(src_pitch);
+ OUT_RELOC_PIXMAP_FENCED_BLT(intel->render_source,
+ I915_GEM_DOMAIN_RENDER, 0,
+ 0);
+ ADVANCE_BATCH_BLT();
+ }
+}
+
+static void
i830_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1,
int dst_y1, int w, int h)
{
@@ -497,10 +632,16 @@ i830_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1,
static void i830_uxa_done_copy(PixmapPtr dest)
{
ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
+ intel_debug_flush(scrn, BLT_BATCH);
+}
- intel_debug_flush(scrn, RENDER_BATCH);
+static void gen6_uxa_done_copy(PixmapPtr dest)
+{
+ ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
+ intel_batch_submit(scrn, FALSE, BLT_BATCH);
}
+
/**
* Do any cleanup from the Composite operation.
*
@@ -1191,17 +1332,30 @@ Bool intel_uxa_init(ScreenPtr screen)
intel->vertex_bo = NULL;
/* Solid fill */
- intel->uxa_driver->check_solid = i830_uxa_check_solid;
- intel->uxa_driver->prepare_solid = i830_uxa_prepare_solid;
- intel->uxa_driver->solid = i830_uxa_solid;
- intel->uxa_driver->done_solid = i830_uxa_done_solid;
+ if (IS_GEN6(intel)) {
+ intel->uxa_driver->check_solid = generic_uxa_check_solid;
+ intel->uxa_driver->prepare_solid = generic_uxa_prepare_solid;
+ intel->uxa_driver->solid = gen6_uxa_solid;
+ intel->uxa_driver->done_solid = gen6_uxa_done_solid;
+ } else {
+ intel->uxa_driver->check_solid = generic_uxa_check_solid;
+ intel->uxa_driver->prepare_solid = generic_uxa_prepare_solid;
+ intel->uxa_driver->solid = i830_uxa_solid;
+ intel->uxa_driver->done_solid = i830_uxa_done_solid;
+ }
/* Copy */
- intel->uxa_driver->check_copy = i830_uxa_check_copy;
- intel->uxa_driver->prepare_copy = i830_uxa_prepare_copy;
- intel->uxa_driver->copy = i830_uxa_copy;
- intel->uxa_driver->done_copy = i830_uxa_done_copy;
-
+ if (IS_GEN6(intel)) {
+ intel->uxa_driver->check_copy = generic_uxa_check_copy;
+ intel->uxa_driver->prepare_copy = generic_uxa_prepare_copy;
+ intel->uxa_driver->copy = gen6_uxa_copy;
+ intel->uxa_driver->done_copy = gen6_uxa_done_copy;
+ } else {
+ intel->uxa_driver->check_copy = generic_uxa_check_copy;
+ intel->uxa_driver->prepare_copy = generic_uxa_prepare_copy;
+ intel->uxa_driver->copy = i830_uxa_copy;
+ intel->uxa_driver->done_copy = i830_uxa_done_copy;
+ }
/* Composite */
if (IS_GEN2(intel)) {
intel->uxa_driver->check_composite = i830_check_composite;
--
1.7.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] split render engine batch buffer and BLT engine
2010-10-26 7:33 [PATCH 1/2] split render engine batch buffer and BLT engine Zou Nan hai
2010-10-26 7:33 ` [PATCH 2/2] use BLT command to accelerate uxa on gen6 Zou Nan hai
@ 2010-10-26 8:17 ` Chris Wilson
2010-10-26 8:23 ` Zou, Nanhai
1 sibling, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2010-10-26 8:17 UTC (permalink / raw)
To: Zou Nan hai, intel-gfx
On Tue, 26 Oct 2010 15:33:15 +0800, Zou Nan hai <nanhai.zou@intel.com> wrote:
> intel: on gen6, BLT commands stay in a seperate BLT ring
> buffer. Split render engine batch and BLT engine batch
> on gen6.
No. The batch buffer needs to be modal, and upon a context switch flushes
the current batch. Otherwise we will not be able to synchronise rendering
between rings.
Plus, implementing that is about 20 lines.
-Chris
>--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] split render engine batch buffer and BLT engine
2010-10-26 8:17 ` [PATCH 1/2] split render engine batch buffer and BLT engine Chris Wilson
@ 2010-10-26 8:23 ` Zou, Nanhai
2010-10-26 8:31 ` Zou, Nanhai
2010-10-26 9:13 ` Chris Wilson
0 siblings, 2 replies; 9+ messages in thread
From: Zou, Nanhai @ 2010-10-26 8:23 UTC (permalink / raw)
To: Chris Wilson, intel-gfx@lists.freedesktop.org
>>-----Original Message-----
>>From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
>>Sent: 2010年10月26日 16:18
>>To: Zou, Nanhai; intel-gfx@lists.freedesktop.org
>>Subject: Re: [Intel-gfx] [PATCH 1/2] split render engine batch buffer and BLT
>>engine
>>
>>On Tue, 26 Oct 2010 15:33:15 +0800, Zou Nan hai <nanhai.zou@intel.com> wrote:
>>> intel: on gen6, BLT commands stay in a seperate BLT ring
>>> buffer. Split render engine batch and BLT engine batch
>>> on gen6.
>>
>>No. The batch buffer needs to be modal, and upon a context switch flushes
>>the current batch. Otherwise we will not be able to synchronise rendering
>>between rings.
>>
>>Plus, implementing that is about 20 lines.
>>-Chris
Hi,
I do not quite understand your point.
How can we mix blitter command with render command in a batch buffer?
Thanks
Zou nan hai
>>
>>>--
>>Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] split render engine batch buffer and BLT engine
2010-10-26 8:23 ` Zou, Nanhai
@ 2010-10-26 8:31 ` Zou, Nanhai
2010-10-26 8:37 ` Chris Wilson
2010-10-26 9:13 ` Chris Wilson
1 sibling, 1 reply; 9+ messages in thread
From: Zou, Nanhai @ 2010-10-26 8:31 UTC (permalink / raw)
To: Zou, Nanhai, Chris Wilson, intel-gfx@lists.freedesktop.org
>>-----Original Message-----
>>From: intel-gfx-bounces+nanhai.zou=intel.com@lists.freedesktop.org
>>[mailto:intel-gfx-bounces+nanhai.zou=intel.com@lists.freedesktop.org] On
>>Behalf Of Zou, Nanhai
>>Sent: 2010年10月26日 16:23
>>To: Chris Wilson; intel-gfx@lists.freedesktop.org
>>Subject: Re: [Intel-gfx] [PATCH 1/2] split render engine batch buffer and BLT
>>engine
>>
>>
>>>>-----Original Message-----
>>>>From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
>>>>Sent: 2010年10月26日 16:18
>>>>To: Zou, Nanhai; intel-gfx@lists.freedesktop.org
>>>>Subject: Re: [Intel-gfx] [PATCH 1/2] split render engine batch buffer and
>>BLT
>>>>engine
>>>>
>>>>On Tue, 26 Oct 2010 15:33:15 +0800, Zou Nan hai <nanhai.zou@intel.com> wrote:
>>>>> intel: on gen6, BLT commands stay in a seperate BLT ring
>>>>> buffer. Split render engine batch and BLT engine batch
>>>>> on gen6.
>>>>
>>>>No. The batch buffer needs to be modal, and upon a context switch flushes
>>>>the current batch. Otherwise we will not be able to synchronise rendering
>>>>between rings.
>>>>
>>>>Plus, implementing that is about 20 lines.
>>>>-Chris
>>Hi,
>> I do not quite understand your point.
>>How can we mix blitter command with render command in a batch buffer?
>>
>>Thanks
>>Zou nan hai
Even if we can implement the batch buffer in a modal way.
I think it is not the best usage model.
Render engine and BLT engine are separate engines on gen6+.
For them to run one by one will not maximum the GPU usage.
Thanks
Zou Nan hai
>>>>
>>>>>--
>>>>Chris Wilson, Intel Open Source Technology Centre
>>_______________________________________________
>>Intel-gfx mailing list
>>Intel-gfx@lists.freedesktop.org
>>http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] split render engine batch buffer and BLT engine
2010-10-26 8:31 ` Zou, Nanhai
@ 2010-10-26 8:37 ` Chris Wilson
2010-10-26 8:59 ` Zou, Nanhai
0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2010-10-26 8:37 UTC (permalink / raw)
To: Zou, Nanhai
On Tue, 26 Oct 2010 16:31:47 +0800, "Zou, Nanhai" <nanhai.zou@intel.com> wrote:
> Even if we can implement the batch buffer in a modal way.
> I think it is not the best usage model.
> Render engine and BLT engine are separate engines on gen6+.
> For them to run one by one will not maximum the GPU usage.
What usage pattern are you designing for? The most common pattern I've
seen is for short intermixed operations on a connected set of buffers. So
just what is the average length of the concurrent batch buffers (with
correct inter-ring flushing) versus a much simpler modal approach?
And do you have any x11perf figures yet?
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] split render engine batch buffer and BLT engine
2010-10-26 8:37 ` Chris Wilson
@ 2010-10-26 8:59 ` Zou, Nanhai
0 siblings, 0 replies; 9+ messages in thread
From: Zou, Nanhai @ 2010-10-26 8:59 UTC (permalink / raw)
To: Chris Wilson, intel-gfx@lists.freedesktop.org
>>-----Original Message-----
>>From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
>>Sent: 2010年10月26日 16:38
>>To: Zou, Nanhai; Zou, Nanhai; intel-gfx@lists.freedesktop.org
>>Subject: RE: [Intel-gfx] [PATCH 1/2] split render engine batch buffer and BLT
>>engine
>>
>>On Tue, 26 Oct 2010 16:31:47 +0800, "Zou, Nanhai" <nanhai.zou@intel.com> wrote:
>>> Even if we can implement the batch buffer in a modal way.
>>> I think it is not the best usage model.
>>> Render engine and BLT engine are separate engines on gen6+.
>>> For them to run one by one will not maximum the GPU usage.
>>
>>What usage pattern are you designing for? The most common pattern I've
>>seen is for short intermixed operations on a connected set of buffers. So
>>just what is the average length of the concurrent batch buffers (with
>>correct inter-ring flushing) versus a much simpler modal approach?
>>
>>And do you have any x11perf figures yet?
>>-Chris
I don't have x11perf result yet.
We have also discussed about the cache coherence design on gen6+.
There are 4 kinds of ring on gen6+ used by different engines, also now CPU and GPU share the last level cache. So we may need a better cache synchronize mechanism.
Thanks
Zou Nan hai
>>
>>--
>>Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] split render engine batch buffer and BLT engine
2010-10-26 8:23 ` Zou, Nanhai
2010-10-26 8:31 ` Zou, Nanhai
@ 2010-10-26 9:13 ` Chris Wilson
2010-10-27 0:33 ` Zou, Nanhai
1 sibling, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2010-10-26 9:13 UTC (permalink / raw)
To: Zou Nanhai, intel-gfx@lists.freedesktop.org
On Tue, 26 Oct 2010 16:23:24 +0800, "Zou, Nanhai" <nanhai.zou@intel.com> wrote:
> I do not quite understand your point.
> How can we mix blitter command with render command in a batch buffer?
We can't. We also can't mix render targets/sources between concurrent
batch buffers either, at the moment. (I'd much rather finish the kernel
synchronisation before teaching X how to synchronise a pair of
interdependent concurrent batch buffers). So in order to get any
synchronisation between the rings you have to flush the BLT batch when
starting a RENDER operation (if that operation accesses a BLT buffer and
vice versa). Hence why I think that a single, modal batch buffer will be
no less efficient than maintaining a pair (or more) concurrent batch
buffers. X's request model means that we will process many sequential ops
from a single client in a single pass which will intermix BLT/RENDER
commands (or just use RENDER predominantly). I do not foresee X attaining
any great level of parallelism between the rings.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] split render engine batch buffer and BLT engine
2010-10-26 9:13 ` Chris Wilson
@ 2010-10-27 0:33 ` Zou, Nanhai
0 siblings, 0 replies; 9+ messages in thread
From: Zou, Nanhai @ 2010-10-27 0:33 UTC (permalink / raw)
To: Chris Wilson, intel-gfx@lists.freedesktop.org
>>-----Original Message-----
>>From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
>>Sent: 2010年10月26日 17:13
>>To: Zou, Nanhai; intel-gfx@lists.freedesktop.org
>>Subject: RE: [Intel-gfx] [PATCH 1/2] split render engine batch buffer and BLT
>>engine
>>
>>On Tue, 26 Oct 2010 16:23:24 +0800, "Zou, Nanhai" <nanhai.zou@intel.com> wrote:
>>> I do not quite understand your point.
>>> How can we mix blitter command with render command in a batch buffer?
>>
>>We can't. We also can't mix render targets/sources between concurrent
>>batch buffers either, at the moment. (I'd much rather finish the kernel
>>synchronisation before teaching X how to synchronise a pair of
>>interdependent concurrent batch buffers). So in order to get any
>>synchronisation between the rings you have to flush the BLT batch when
>>starting a RENDER operation (if that operation accesses a BLT buffer and
>>vice versa). Hence why I think that a single, modal batch buffer will be
>>no less efficient than maintaining a pair (or more) concurrent batch
>>buffers. X's request model means that we will process many sequential ops
>>from a single client in a single pass which will intermix BLT/RENDER
>>commands (or just use RENDER predominantly). I do not foresee X attaining
>>any great level of parallelism between the rings.
>>-Chris
Well,
I think I get your point. Will work our patch like that.
Thanks
Zou Nan hai
>>
>>--
>>Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2010-10-27 0:33 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-10-26 7:33 [PATCH 1/2] split render engine batch buffer and BLT engine Zou Nan hai
2010-10-26 7:33 ` [PATCH 2/2] use BLT command to accelerate uxa on gen6 Zou Nan hai
2010-10-26 8:17 ` [PATCH 1/2] split render engine batch buffer and BLT engine Chris Wilson
2010-10-26 8:23 ` Zou, Nanhai
2010-10-26 8:31 ` Zou, Nanhai
2010-10-26 8:37 ` Chris Wilson
2010-10-26 8:59 ` Zou, Nanhai
2010-10-26 9:13 ` Chris Wilson
2010-10-27 0:33 ` Zou, Nanhai
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.