* [PATH v2 1/6] Xv: set the surface state base address
2010-10-27 2:17 [PATH v2 0/6] Xv on Sandybridge Xiang, Haihao
@ 2010-10-27 2:17 ` Xiang, Haihao
2010-10-27 2:17 ` [PATH v2 2/6] Xv: Send instruction doesn't use implied move when sampling YUV surface Xiang, Haihao
` (4 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Xiang, Haihao @ 2010-10-27 2:17 UTC (permalink / raw)
To: intel-gfx
To prepare for Xv on Sandybridge. It is easy to fill the binding
table without relocation and make sure that the pointer to binding
table only uses bits[15:0].
Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
---
src/i965_video.c | 141 +++++++++++++++++++++++++----------------------------
1 files changed, 67 insertions(+), 74 deletions(-)
diff --git a/src/i965_video.c b/src/i965_video.c
index 4ededde..aaf10fa 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -360,17 +360,20 @@ intel_alloc_and_map(intel_screen_private *intel, char *name, int size,
return 0;
}
-static drm_intel_bo *i965_create_dst_surface_state(ScrnInfoPtr scrn,
- PixmapPtr pixmap)
+static void i965_create_dst_surface_state(ScrnInfoPtr scrn,
+ PixmapPtr pixmap,
+ drm_intel_bo *surf_bo,
+ uint32_t offset)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
struct brw_surface_state *dest_surf_state;
drm_intel_bo *pixmap_bo = intel_get_pixmap_bo(pixmap);
- drm_intel_bo *surf_bo;
- if (intel_alloc_and_map(intel, "textured video surface state", 4096,
- &surf_bo, &dest_surf_state) != 0)
- return NULL;
+ if (drm_intel_bo_map(surf_bo, TRUE) != 0)
+ return;
+
+ dest_surf_state = (struct brw_surface_state *)((char *)surf_bo->virtual + offset);
+ memset(dest_surf_state, 0, sizeof(*dest_surf_state));
dest_surf_state->ss0.surface_type = BRW_SURFACE_2D;
dest_surf_state->ss0.data_return_format =
@@ -393,7 +396,7 @@ static drm_intel_bo *i965_create_dst_surface_state(ScrnInfoPtr scrn,
dest_surf_state->ss0.render_cache_read_mode = 0;
dest_surf_state->ss1.base_addr =
- intel_emit_reloc(surf_bo, offsetof(struct brw_surface_state, ss1),
+ intel_emit_reloc(surf_bo, offset + offsetof(struct brw_surface_state, ss1),
pixmap_bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
dest_surf_state->ss2.height = scrn->virtualY - 1;
@@ -405,24 +408,25 @@ static drm_intel_bo *i965_create_dst_surface_state(ScrnInfoPtr scrn,
dest_surf_state->ss3.tile_walk = 0; /* TileX */
drm_intel_bo_unmap(surf_bo);
- return surf_bo;
}
-static drm_intel_bo *i965_create_src_surface_state(ScrnInfoPtr scrn,
- drm_intel_bo * src_bo,
- uint32_t src_offset,
- int src_width,
- int src_height,
- int src_pitch,
- uint32_t src_surf_format)
+static void i965_create_src_surface_state(ScrnInfoPtr scrn,
+ drm_intel_bo * src_bo,
+ uint32_t src_offset,
+ int src_width,
+ int src_height,
+ int src_pitch,
+ uint32_t src_surf_format,
+ drm_intel_bo *surface_bo,
+ uint32_t offset)
{
- intel_screen_private *intel = intel_get_screen_private(scrn);
- drm_intel_bo *surface_bo;
struct brw_surface_state *src_surf_state;
- if (intel_alloc_and_map(intel, "textured video surface state", 4096,
- &surface_bo, &src_surf_state) != 0)
- return NULL;
+ if (drm_intel_bo_map(surface_bo, TRUE) != 0)
+ return;
+
+ src_surf_state = (struct brw_surface_state *)((char *)surface_bo->virtual + offset);
+ memset(src_surf_state, 0, sizeof(*src_surf_state));
/* Set up the source surface state buffer */
src_surf_state->ss0.surface_type = BRW_SURFACE_2D;
@@ -446,7 +450,7 @@ static drm_intel_bo *i965_create_src_surface_state(ScrnInfoPtr scrn,
if (src_bo) {
src_surf_state->ss1.base_addr =
intel_emit_reloc(surface_bo,
- offsetof(struct brw_surface_state, ss1),
+ offset + offsetof(struct brw_surface_state, ss1),
src_bo, src_offset,
I915_GEM_DOMAIN_SAMPLER, 0);
} else {
@@ -454,31 +458,25 @@ static drm_intel_bo *i965_create_src_surface_state(ScrnInfoPtr scrn,
}
drm_intel_bo_unmap(surface_bo);
- return surface_bo;
}
-static drm_intel_bo *i965_create_binding_table(ScrnInfoPtr scrn,
- drm_intel_bo ** surf_bos,
- int n_surf)
+static void i965_create_binding_table(ScrnInfoPtr scrn,
+ drm_intel_bo *bind_bo,
+ int n_surf)
{
- intel_screen_private *intel = intel_get_screen_private(scrn);
- drm_intel_bo *bind_bo;
uint32_t *binding_table;
int i;
/* Set up a binding table for our surfaces. Only the PS will use it */
+ if (drm_intel_bo_map(bind_bo, TRUE) != 0)
+ return;
- if (intel_alloc_and_map(intel, "textured video binding table", 4096,
- &bind_bo, &binding_table) != 0)
- return NULL;
+ binding_table = (uint32_t*)((char *)bind_bo->virtual + n_surf * ALIGN(sizeof(struct brw_surface_state), 32));
for (i = 0; i < n_surf; i++)
- binding_table[i] =
- intel_emit_reloc(bind_bo, i * sizeof(uint32_t), surf_bos[i],
- 0, I915_GEM_DOMAIN_INSTRUCTION, 0);
+ binding_table[i] = i * ALIGN(sizeof(struct brw_surface_state), 32);
drm_intel_bo_unmap(bind_bo);
- return bind_bo;
}
static drm_intel_bo *i965_create_sampler_state(ScrnInfoPtr scrn)
@@ -757,7 +755,7 @@ static drm_intel_bo *i965_create_cc_state(ScrnInfoPtr scrn)
}
static void
-i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * bind_bo, int n_src_surf)
+i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * surface_state_binding_table_bo, int n_src_surf)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
int urb_vs_start, urb_vs_size;
@@ -804,7 +802,7 @@ i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * bind_bo, int n_src_surf)
if (IS_GEN5(intel)) {
OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6);
OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */
- OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */
+ OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */
OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */
/* general state max addr, disabled */
@@ -816,7 +814,7 @@ i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * bind_bo, int n_src_surf)
} else {
OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4);
OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */
- OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */
+ OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */
/* general state max addr, disabled */
OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY);
@@ -850,7 +848,7 @@ i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * bind_bo, int n_src_surf)
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
/* Only the PS uses the binding table */
- OUT_RELOC(bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0);
+ OUT_BATCH((n_src_surf + 1) * ALIGN(sizeof(struct brw_surface_state), 32));
/* Blend constant color (magenta is fun) */
OUT_BATCH(BRW_3DSTATE_CONSTANT_COLOR | 3);
@@ -975,14 +973,14 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
BoxPtr pbox;
int nbox, dxo, dyo, pix_xoff, pix_yoff;
float src_scale_x, src_scale_y;
- int src_surf, i;
+ int src_surf;
int n_src_surf;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
int src_width[6];
int src_height[6];
int src_pitch[6];
- drm_intel_bo *bind_bo, *surf_bos[7];
+ drm_intel_bo *surface_state_binding_table_bo;
#if 0
ErrorF("BroadwaterDisplayVideoTextured: %dx%d (pitch %d)\n", width,
@@ -1040,35 +1038,30 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
* I830PutImage.
*/
- /* Upload kernels */
- surf_bos[0] = i965_create_dst_surface_state(scrn, pixmap);
- if (!surf_bos[0])
+ surface_state_binding_table_bo =
+ drm_intel_bo_alloc(intel->bufmgr,
+ "surface state & binding table",
+ (n_src_surf + 1) * (ALIGN(sizeof(struct brw_surface_state), 32) + sizeof(uint32_t)),
+ 4096);
+
+ if (!surface_state_binding_table_bo)
return;
+
+ i965_create_dst_surface_state(scrn, pixmap, surface_state_binding_table_bo, 0);
for (src_surf = 0; src_surf < n_src_surf; src_surf++) {
- drm_intel_bo *surf_bo =
- i965_create_src_surface_state(scrn,
- adaptor_priv->buf,
- src_surf_base[src_surf],
- src_width[src_surf],
- src_height[src_surf],
- src_pitch[src_surf],
- src_surf_format);
- if (!surf_bo) {
- int q;
- for (q = 0; q < src_surf + 1; q++)
- drm_intel_bo_unreference(surf_bos[q]);
- return;
- }
- surf_bos[src_surf + 1] = surf_bo;
- }
- bind_bo = i965_create_binding_table(scrn, surf_bos, n_src_surf + 1);
- for (i = 0; i < n_src_surf + 1; i++) {
- drm_intel_bo_unreference(surf_bos[i]);
- surf_bos[i] = NULL;
+ i965_create_src_surface_state(scrn,
+ adaptor_priv->buf,
+ src_surf_base[src_surf],
+ src_width[src_surf],
+ src_height[src_surf],
+ src_pitch[src_surf],
+ src_surf_format,
+ surface_state_binding_table_bo,
+ (src_surf + 1) * ALIGN(sizeof(struct brw_surface_state), 32));
}
- if (!bind_bo)
- return;
+
+ i965_create_binding_table(scrn, surface_state_binding_table_bo, n_src_surf + 1);
if (intel->video.gen4_sampler_bo == NULL)
intel->video.gen4_sampler_bo = i965_create_sampler_state(scrn);
@@ -1077,7 +1070,7 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
i965_create_program(scrn, &sip_kernel_static[0][0],
sizeof(sip_kernel_static));
if (!intel->video.gen4_sip_kernel_bo) {
- drm_intel_bo_unreference(bind_bo);
+ drm_intel_bo_unreference(surface_state_binding_table_bo);
return;
}
}
@@ -1085,14 +1078,14 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
if (intel->video.gen4_vs_bo == NULL) {
intel->video.gen4_vs_bo = i965_create_vs_state(scrn);
if (!intel->video.gen4_vs_bo) {
- drm_intel_bo_unreference(bind_bo);
+ drm_intel_bo_unreference(surface_state_binding_table_bo);
return;
}
}
if (intel->video.gen4_sf_bo == NULL) {
intel->video.gen4_sf_bo = i965_create_sf_state(scrn);
if (!intel->video.gen4_sf_bo) {
- drm_intel_bo_unreference(bind_bo);
+ drm_intel_bo_unreference(surface_state_binding_table_bo);
return;
}
}
@@ -1101,7 +1094,7 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
i965_create_wm_state(scrn, intel->video.gen4_sampler_bo,
TRUE);
if (!intel->video.gen4_wm_packed_bo) {
- drm_intel_bo_unreference(bind_bo);
+ drm_intel_bo_unreference(surface_state_binding_table_bo);
return;
}
}
@@ -1111,7 +1104,7 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
i965_create_wm_state(scrn, intel->video.gen4_sampler_bo,
FALSE);
if (!intel->video.gen4_wm_planar_bo) {
- drm_intel_bo_unreference(bind_bo);
+ drm_intel_bo_unreference(surface_state_binding_table_bo);
return;
}
}
@@ -1119,7 +1112,7 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
if (intel->video.gen4_cc_bo == NULL) {
intel->video.gen4_cc_bo = i965_create_cc_state(scrn);
if (!intel->video.gen4_cc_bo) {
- drm_intel_bo_unreference(bind_bo);
+ drm_intel_bo_unreference(surface_state_binding_table_bo);
return;
}
}
@@ -1155,7 +1148,7 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
drm_intel_bo *bo_table[] = {
NULL, /* vb_bo */
intel->batch_bo,
- bind_bo,
+ surface_state_binding_table_bo,
intel->video.gen4_sampler_bo,
intel->video.gen4_sip_kernel_bo,
intel->video.gen4_vs_bo,
@@ -1204,7 +1197,7 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
intel_batch_start_atomic(scrn, 100);
- i965_emit_video_setup(scrn, bind_bo, n_src_surf);
+ i965_emit_video_setup(scrn, surface_state_binding_table_bo, n_src_surf);
/* Set up the pointer to our vertex buffer */
OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
@@ -1238,7 +1231,7 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
}
/* release reference once we're finished */
- drm_intel_bo_unreference(bind_bo);
+ drm_intel_bo_unreference(surface_state_binding_table_bo);
intel_debug_flush(scrn);
}
--
1.7.0.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATH v2 2/6] Xv: Send instruction doesn't use implied move when sampling YUV surface
2010-10-27 2:17 [PATH v2 0/6] Xv on Sandybridge Xiang, Haihao
2010-10-27 2:17 ` [PATH v2 1/6] Xv: set the surface state base address Xiang, Haihao
@ 2010-10-27 2:17 ` Xiang, Haihao
2010-10-27 2:17 ` [PATH v2 3/6] Xv: fragments for xv on Sandybridge Xiang, Haihao
` (3 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Xiang, Haihao @ 2010-10-27 2:17 UTC (permalink / raw)
To: intel-gfx
The two fragments will be reused for sampling YUV surface
and send doesn't have implied move on Sandybridge
Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
---
src/render_program/exa_wm_src_sample_argb.g4a | 3 ++-
src/render_program/exa_wm_src_sample_argb.g4b | 3 ++-
src/render_program/exa_wm_src_sample_argb.g4b.gen5 | 3 ++-
src/render_program/exa_wm_src_sample_planar.g4a | 7 ++++---
src/render_program/exa_wm_src_sample_planar.g4b | 7 ++++---
.../exa_wm_src_sample_planar.g4b.gen5 | 7 ++++---
6 files changed, 18 insertions(+), 12 deletions(-)
diff --git a/src/render_program/exa_wm_src_sample_argb.g4a b/src/render_program/exa_wm_src_sample_argb.g4a
index c20f53f..384fe26 100644
--- a/src/render_program/exa_wm_src_sample_argb.g4a
+++ b/src/render_program/exa_wm_src_sample_argb.g4a
@@ -36,12 +36,13 @@ include(`exa_wm.g4i')
/* load argb */
mov (1) g0.8<1>UD 0x00000000UD { align1 mask_disable };
+mov (8) src_msg<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
/* src_msg will be copied with g0, as it contains send desc */
/* emit sampler 'send' cmd */
send (16) src_msg_ind /* msg reg index */
src_sample_base<1>UW /* readback */
- g0<8,8,1>UW /* copy to msg start reg*/
+ null
sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
/* here(src->dst) we should use src_sampler and src_surface */
mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
diff --git a/src/render_program/exa_wm_src_sample_argb.g4b b/src/render_program/exa_wm_src_sample_argb.g4b
index c5b9274..a15e40a 100644
--- a/src/render_program/exa_wm_src_sample_argb.g4b
+++ b/src/render_program/exa_wm_src_sample_argb.g4b
@@ -1,2 +1,3 @@
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
- { 0x01800031, 0x21c01d29, 0x008d0000, 0x02580001 },
+ { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
+ { 0x01800031, 0x21c01c09, 0x00000000, 0x02580001 },
diff --git a/src/render_program/exa_wm_src_sample_argb.g4b.gen5 b/src/render_program/exa_wm_src_sample_argb.g4b.gen5
index f8cb41e..42039af 100644
--- a/src/render_program/exa_wm_src_sample_argb.g4b.gen5
+++ b/src/render_program/exa_wm_src_sample_argb.g4b.gen5
@@ -1,2 +1,3 @@
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
- { 0x01800031, 0x21c01d29, 0x208d0000, 0x0a8a0001 },
+ { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
+ { 0x01800031, 0x21c01c09, 0x20000000, 0x0a8a0001 },
diff --git a/src/render_program/exa_wm_src_sample_planar.g4a b/src/render_program/exa_wm_src_sample_planar.g4a
index ad33350..5f5520b 100644
--- a/src/render_program/exa_wm_src_sample_planar.g4a
+++ b/src/render_program/exa_wm_src_sample_planar.g4a
@@ -41,9 +41,10 @@ mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable };
/* emit sampler 'send' cmd */
/* sample Y */
+mov (8) src_msg<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
send (16) src_msg_ind /* msg reg index */
src_sample_g<1>UW /* readback */
- g0<8,8,1>UW /* copy to msg start reg*/
+ null
sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
/* here(src->dst) we should use src_sampler and src_surface */
mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */
@@ -51,7 +52,7 @@ send (16) src_msg_ind /* msg reg index */
/* sample U (Cr) */
send (16) src_msg_ind /* msg reg index */
src_sample_r<1>UW /* readback */
- g0<8,8,1>UW /* copy to msg start reg*/
+ null
sampler (3,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
/* here(src->dst) we should use src_sampler and src_surface */
mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */
@@ -59,7 +60,7 @@ send (16) src_msg_ind /* msg reg index */
/* sample V (Cb) */
send (16) src_msg_ind /* msg reg index */
src_sample_b<1>UW /* readback */
- g0<8,8,1>UW /* copy to msg start reg*/
+ null
sampler (5,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
/* here(src->dst) we should use src_sampler and src_surface */
mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */
diff --git a/src/render_program/exa_wm_src_sample_planar.g4b b/src/render_program/exa_wm_src_sample_planar.g4b
index 23e5e0d..c8dc47d 100644
--- a/src/render_program/exa_wm_src_sample_planar.g4b
+++ b/src/render_program/exa_wm_src_sample_planar.g4b
@@ -1,4 +1,5 @@
{ 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
- { 0x01800031, 0x22001d29, 0x008d0000, 0x02520001 },
- { 0x01800031, 0x21c01d29, 0x008d0000, 0x02520003 },
- { 0x01800031, 0x22401d29, 0x008d0000, 0x02520005 },
+ { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
+ { 0x01800031, 0x22001c09, 0x00000000, 0x02520001 },
+ { 0x01800031, 0x21c01c09, 0x00000000, 0x02520003 },
+ { 0x01800031, 0x22401c09, 0x00000000, 0x02520005 },
diff --git a/src/render_program/exa_wm_src_sample_planar.g4b.gen5 b/src/render_program/exa_wm_src_sample_planar.g4b.gen5
index 71068d9..ce3670b 100644
--- a/src/render_program/exa_wm_src_sample_planar.g4b.gen5
+++ b/src/render_program/exa_wm_src_sample_planar.g4b.gen5
@@ -1,4 +1,5 @@
{ 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
- { 0x01800031, 0x22001d29, 0x208d0000, 0x0a2a0001 },
- { 0x01800031, 0x21c01d29, 0x208d0000, 0x0a2a0003 },
- { 0x01800031, 0x22401d29, 0x208d0000, 0x0a2a0005 },
+ { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
+ { 0x01800031, 0x22001c09, 0x20000000, 0x0a2a0001 },
+ { 0x01800031, 0x21c01c09, 0x20000000, 0x0a2a0003 },
+ { 0x01800031, 0x22401c09, 0x20000000, 0x0a2a0005 },
--
1.7.0.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATH v2 3/6] Xv: fragments for xv on Sandybridge.
2010-10-27 2:17 [PATH v2 0/6] Xv on Sandybridge Xiang, Haihao
2010-10-27 2:17 ` [PATH v2 1/6] Xv: set the surface state base address Xiang, Haihao
2010-10-27 2:17 ` [PATH v2 2/6] Xv: Send instruction doesn't use implied move when sampling YUV surface Xiang, Haihao
@ 2010-10-27 2:17 ` Xiang, Haihao
2010-10-27 2:17 ` [PATH v2 4/6] Xv: setup pipeline for Xv " Xiang, Haihao
` (2 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Xiang, Haihao @ 2010-10-27 2:17 UTC (permalink / raw)
To: intel-gfx
[-- Attachment #1: Type: text/plain, Size: 12359 bytes --]
Need to update intel-gen4asm to build these fragments
Signed--off-by: Xiang, Haihao <haihao.xiang@intel.com>
---
configure.ac | 2 +-
src/render_program/Makefile.am | 27 +++++++-
src/render_program/exa_wm_src_affine.g6a | 47 +++++++++++++
src/render_program/exa_wm_src_affine.g6b | 4 +
src/render_program/exa_wm_src_sample_argb.g6a | 1 +
src/render_program/exa_wm_src_sample_argb.g6b | 3 +
src/render_program/exa_wm_src_sample_planar.g6a | 1 +
src/render_program/exa_wm_src_sample_planar.g6b | 5 ++
src/render_program/exa_wm_write.g6a | 79 +++++++++++++++++++++++
src/render_program/exa_wm_write.g6b | 19 ++++++
src/render_program/exa_wm_yuv_rgb.g6a | 1 +
src/render_program/exa_wm_yuv_rgb.g6b | 12 ++++
12 files changed, 197 insertions(+), 4 deletions(-)
create mode 100644 src/render_program/exa_wm_src_affine.g6a
create mode 100644 src/render_program/exa_wm_src_affine.g6b
create mode 120000 src/render_program/exa_wm_src_sample_argb.g6a
create mode 100644 src/render_program/exa_wm_src_sample_argb.g6b
create mode 120000 src/render_program/exa_wm_src_sample_planar.g6a
create mode 100644 src/render_program/exa_wm_src_sample_planar.g6b
create mode 100644 src/render_program/exa_wm_write.g6a
create mode 100644 src/render_program/exa_wm_write.g6b
create mode 120000 src/render_program/exa_wm_yuv_rgb.g6a
create mode 100644 src/render_program/exa_wm_yuv_rgb.g6b
diff --git a/configure.ac b/configure.ac
index 58fa929..0553df3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -44,7 +44,7 @@ XORG_DEFAULT_OPTIONS
AC_DISABLE_STATIC
AC_PROG_LIBTOOL
-PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.0], [gen4asm=yes], [gen4asm=no])
+PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.1], [gen4asm=yes], [gen4asm=no])
AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes)
PKG_CHECK_MODULES(UDEV, [libudev], [udev=yes], [udev=no])
diff --git a/src/render_program/Makefile.am b/src/render_program/Makefile.am
index c9a0bfc..5229ef5 100644
--- a/src/render_program/Makefile.am
+++ b/src/render_program/Makefile.am
@@ -61,22 +61,43 @@ INTEL_G4B_GEN5 = \
exa_wm_yuv_rgb.g4b.gen5 \
exa_wm_xy.g4b.gen5
+INTEL_G6A = \
+ exa_wm_src_affine.g6a \
+ exa_wm_src_sample_argb.g6a \
+ exa_wm_src_sample_planar.g6a \
+ exa_wm_write.g6a \
+ exa_wm_yuv_rgb.g6a
+
+INTEL_G6B = \
+ exa_wm_src_affine.g6b \
+ exa_wm_src_sample_argb.g6b \
+ exa_wm_src_sample_planar.g6b \
+ exa_wm_write.g6b \
+ exa_wm_yuv_rgb.g6b
+
EXTRA_DIST = \
$(INTEL_G4A) \
$(INTEL_G4I) \
$(INTEL_G4B) \
- $(INTEL_G4B_GEN5)
+ $(INTEL_G4B_GEN5)\
+ $(INTEL_G6A) \
+ $(INTEL_G6B)
if HAVE_GEN4ASM
-SUFFIXES = .g4a .g4b
+SUFFIXES = .g4a .g4b .g6a .g6b
.g4a.g4b:
m4 -I$(srcdir) -s $< > $*.g4m && intel-gen4asm -o $@ $*.g4m && intel-gen4asm -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
+.g6a.g6b:
+ m4 -I$(srcdir) -s $< > $*.g6m && intel-gen4asm -g 6 -o $@ $*.g6m && rm $*.g6m
+
$(INTEL_G4B): $(INTEL_G4I)
+$(INTEL_G6B): $(INTEL_G4I)
-BUILT_SOURCES= $(INTEL_G4B)
+BUILT_SOURCES= $(INTEL_G4B) $(INTEL_G6B)
clean-local:
-rm -f $(INTEL_G4B) $(INTEL_G4B_GEN5)
+ -rm -f $(INTEL_G6B)
endif
diff --git a/src/render_program/exa_wm_src_affine.g6a b/src/render_program/exa_wm_src_affine.g6a
new file mode 100644
index 0000000..08195a4
--- /dev/null
+++ b/src/render_program/exa_wm_src_affine.g6a
@@ -0,0 +1,47 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Fragment to compute src u/v values
+ */
+include(`exa_wm.g4i')
+
+define(`ul', `src_u')
+define(`uh', `m3')
+define(`vl', `src_v')
+define(`vh', `m5')
+
+define(`bl', `g2.0<8,8,1>F')
+define(`bh', `g4.0<8,8,1>F')
+
+define(`a0_a_x',`g6.0<0,1,0>F')
+define(`a0_a_y',`g6.16<0,1,0>F')
+
+/* U */
+pln (8) ul<1>F a0_a_x bl { align1 }; /* pixel 0-7 */
+pln (8) uh<1>F a0_a_x bh { align1 }; /* pixel 8-15 */
+
+/* V */
+pln (8) vl<1>F a0_a_y bl { align1 }; /* pixel 0-7 */
+pln (8) vh<1>F a0_a_y bh { align1 }; /* pixel 8-15 */
diff --git a/src/render_program/exa_wm_src_affine.g6b b/src/render_program/exa_wm_src_affine.g6b
new file mode 100644
index 0000000..7035e6a
--- /dev/null
+++ b/src/render_program/exa_wm_src_affine.g6b
@@ -0,0 +1,4 @@
+ { 0x0060005a, 0x204077be, 0x000000c0, 0x008d0040 },
+ { 0x0060005a, 0x206077be, 0x000000c0, 0x008d0080 },
+ { 0x0060005a, 0x208077be, 0x000000d0, 0x008d0040 },
+ { 0x0060005a, 0x20a077be, 0x000000d0, 0x008d0080 },
diff --git a/src/render_program/exa_wm_src_sample_argb.g6a b/src/render_program/exa_wm_src_sample_argb.g6a
new file mode 120000
index 0000000..2684089
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_argb.g6a
@@ -0,0 +1 @@
+exa_wm_src_sample_argb.g4a
\ No newline at end of file
diff --git a/src/render_program/exa_wm_src_sample_argb.g6b b/src/render_program/exa_wm_src_sample_argb.g6b
new file mode 100644
index 0000000..8bfe849
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_argb.g6b
@@ -0,0 +1,3 @@
+ { 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
+ { 0x02800031, 0x21c01cc9, 0x00000020, 0x0a8a0001 },
diff --git a/src/render_program/exa_wm_src_sample_planar.g6a b/src/render_program/exa_wm_src_sample_planar.g6a
new file mode 120000
index 0000000..d4e34a1
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_planar.g6a
@@ -0,0 +1 @@
+exa_wm_src_sample_planar.g4a
\ No newline at end of file
diff --git a/src/render_program/exa_wm_src_sample_planar.g6b b/src/render_program/exa_wm_src_sample_planar.g6b
new file mode 100644
index 0000000..0a22827
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_planar.g6b
@@ -0,0 +1,5 @@
+ { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+ { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
+ { 0x02800031, 0x22001cc9, 0x00000020, 0x0a2a0001 },
+ { 0x02800031, 0x21c01cc9, 0x00000020, 0x0a2a0003 },
+ { 0x02800031, 0x22401cc9, 0x00000020, 0x0a2a0005 },
diff --git a/src/render_program/exa_wm_write.g6a b/src/render_program/exa_wm_write.g6a
new file mode 100644
index 0000000..27f91b5
--- /dev/null
+++ b/src/render_program/exa_wm_write.g6a
@@ -0,0 +1,79 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+include(`exa_wm.g4i')
+
+/*
+ * Prepare data in m2-m3 for Red channel, m4-m5 for Green channel,
+ * m6-m7 for Blue and m8-m9 for Alpha channel
+ */
+define(`slot_r_00', `m2')
+define(`slot_r_01', `m3')
+define(`slot_g_00', `m4')
+define(`slot_g_01', `m5')
+define(`slot_b_00', `m6')
+define(`slot_b_01', `m7')
+define(`slot_a_00', `m8')
+define(`slot_a_01', `m9')
+
+mov (8) slot_r_00<1>F src_sample_r_01<8,8,1>F { align1 };
+mov (8) slot_r_01<1>F src_sample_r_23<8,8,1>F { align1 };
+
+mov (8) slot_g_00<1>F src_sample_g_01<8,8,1>F { align1 };
+mov (8) slot_g_01<1>F src_sample_g_23<8,8,1>F { align1 };
+
+mov (8) slot_b_00<1>F src_sample_b_01<8,8,1>F { align1 };
+mov (8) slot_b_01<1>F src_sample_b_23<8,8,1>F { align1 };
+
+mov (8) slot_a_00<1>F src_sample_a_01<8,8,1>F { align1 };
+mov (8) slot_a_01<1>F src_sample_a_23<8,8,1>F { align1 };
+
+/* pass payload in m0-m1 */
+mov (8) data_port_msg_0<1>UD g0<8,8,1>UD { align1 };
+mov (8) data_port_msg_1<1>UD g1<8,8,1>UD { align1 };
+
+/* write */
+send (16)
+ data_port_msg_0_ind
+ acc0<1>UW
+ null
+ write (
+ 0, /* binding_table */
+ 16, /* pixel scordboard clear, msg type simd16 single source */
+ 12, /* render target write */
+ 0 /* no write commit message */
+ )
+ mlen 10
+ rlen 0
+ { align1 EOT };
+
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+
diff --git a/src/render_program/exa_wm_write.g6b b/src/render_program/exa_wm_write.g6b
new file mode 100644
index 0000000..9db2129
--- /dev/null
+++ b/src/render_program/exa_wm_write.g6b
@@ -0,0 +1,19 @@
+ { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d0280, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
+ { 0x00600001, 0x20000022, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x05800031, 0x24001cc8, 0x00000000, 0x94099000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/render_program/exa_wm_yuv_rgb.g6a b/src/render_program/exa_wm_yuv_rgb.g6a
new file mode 120000
index 0000000..d34d246
--- /dev/null
+++ b/src/render_program/exa_wm_yuv_rgb.g6a
@@ -0,0 +1 @@
+exa_wm_yuv_rgb.g4a
\ No newline at end of file
diff --git a/src/render_program/exa_wm_yuv_rgb.g6b b/src/render_program/exa_wm_yuv_rgb.g6b
new file mode 100644
index 0000000..01ec5e5
--- /dev/null
+++ b/src/render_program/exa_wm_yuv_rgb.g6b
@@ -0,0 +1,12 @@
+ { 0x00800040, 0x23007fbd, 0x008d0200, 0xbd808081 },
+ { 0x00800041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
+ { 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
+ { 0x00800040, 0x23407fbd, 0x008d0240, 0xbf008084 },
+ { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
+ { 0x80800048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
+ { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
+ { 0x00800048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
+ { 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
+ { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
+ { 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 },
+ { 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 },
--
1.7.0.4
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATH v2 4/6] Xv: setup pipeline for Xv on Sandybridge
2010-10-27 2:17 [PATH v2 0/6] Xv on Sandybridge Xiang, Haihao
` (2 preceding siblings ...)
2010-10-27 2:17 ` [PATH v2 3/6] Xv: fragments for xv on Sandybridge Xiang, Haihao
@ 2010-10-27 2:17 ` Xiang, Haihao
2010-10-27 2:17 ` [PATH v2 5/6] Xv: enable TextureAdaptor for Sandybridge Xiang, Haihao
2010-10-27 2:17 ` [PATH v2 6/6] Xv: don't call intel_wait_for_scanline on Sandybridge Xiang, Haihao
5 siblings, 0 replies; 9+ messages in thread
From: Xiang, Haihao @ 2010-10-27 2:17 UTC (permalink / raw)
To: intel-gfx
Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
---
src/brw_structs.h | 100 ++++++++
src/i965_reg.h | 98 ++++++++
src/i965_video.c | 627 +++++++++++++++++++++++++++++++++++++++++++++++
src/intel.h | 4 +
src/intel_batchbuffer.c | 25 ++-
src/intel_video.h | 7 +
6 files changed, 855 insertions(+), 6 deletions(-)
diff --git a/src/brw_structs.h b/src/brw_structs.h
index 1cee5bd..d089ba1 100644
--- a/src/brw_structs.h
+++ b/src/brw_structs.h
@@ -1487,4 +1487,104 @@ struct brw_interface_descriptor {
} desc3;
};
+struct gen6_blend_state
+{
+ struct {
+ unsigned int dest_blend_factor:5;
+ unsigned int source_blend_factor:5;
+ unsigned int pad3:1;
+ unsigned int blend_func:3;
+ unsigned int pad2:1;
+ unsigned int ia_dest_blend_factor:5;
+ unsigned int ia_source_blend_factor:5;
+ unsigned int pad1:1;
+ unsigned int ia_blend_func:3;
+ unsigned int pad0:1;
+ unsigned int ia_blend_enable:1;
+ unsigned int blend_enable:1;
+ } blend0;
+
+ struct {
+ unsigned int post_blend_clamp_enable:1;
+ unsigned int pre_blend_clamp_enable:1;
+ unsigned int clamp_range:2;
+ unsigned int pad0:4;
+ unsigned int x_dither_offset:2;
+ unsigned int y_dither_offset:2;
+ unsigned int dither_enable:1;
+ unsigned int alpha_test_func:3;
+ unsigned int alpha_test_enable:1;
+ unsigned int pad1:1;
+ unsigned int logic_op_func:4;
+ unsigned int logic_op_enable:1;
+ unsigned int pad2:1;
+ unsigned int write_disable_b:1;
+ unsigned int write_disable_g:1;
+ unsigned int write_disable_r:1;
+ unsigned int write_disable_a:1;
+ unsigned int pad3:1;
+ unsigned int alpha_to_coverage_dither:1;
+ unsigned int alpha_to_one:1;
+ unsigned int alpha_to_coverage:1;
+ } blend1;
+};
+
+struct gen6_color_calc_state
+{
+ struct {
+ unsigned int alpha_test_format:1;
+ unsigned int pad0:14;
+ unsigned int round_disable:1;
+ unsigned int bf_stencil_ref:8;
+ unsigned int stencil_ref:8;
+ } cc0;
+
+ union {
+ float alpha_ref_f;
+ struct {
+ unsigned int ui:8;
+ unsigned int pad0:24;
+ } alpha_ref_fi;
+ } cc1;
+
+ float constant_r;
+ float constant_g;
+ float constant_b;
+ float constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+ struct {
+ unsigned int pad0:3;
+ unsigned int bf_stencil_pass_depth_pass_op:3;
+ unsigned int bf_stencil_pass_depth_fail_op:3;
+ unsigned int bf_stencil_fail_op:3;
+ unsigned int bf_stencil_func:3;
+ unsigned int bf_stencil_enable:1;
+ unsigned int pad1:2;
+ unsigned int stencil_write_enable:1;
+ unsigned int stencil_pass_depth_pass_op:3;
+ unsigned int stencil_pass_depth_fail_op:3;
+ unsigned int stencil_fail_op:3;
+ unsigned int stencil_func:3;
+ unsigned int stencil_enable:1;
+ } ds0;
+
+ struct {
+ unsigned int bf_stencil_write_mask:8;
+ unsigned int bf_stencil_test_mask:8;
+ unsigned int stencil_write_mask:8;
+ unsigned int stencil_test_mask:8;
+ } ds1;
+
+ struct {
+ unsigned int pad0:26;
+ unsigned int depth_write_enable:1;
+ unsigned int depth_test_func:3;
+ unsigned int pad1:1;
+ unsigned int depth_test_enable:1;
+ } ds2;
+};
+
#endif
diff --git a/src/i965_reg.h b/src/i965_reg.h
index fe419dc..3953dab 100644
--- a/src/i965_reg.h
+++ b/src/i965_reg.h
@@ -22,6 +22,10 @@
#define BRW_3DSTATE_PIPELINED_POINTERS BRW_3D(3, 0, 0)
#define BRW_3DSTATE_BINDING_TABLE_POINTERS BRW_3D(3, 0, 1)
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
+
#define BRW_3DSTATE_VERTEX_BUFFERS BRW_3D(3, 0, 8)
#define BRW_3DSTATE_VERTEX_ELEMENTS BRW_3D(3, 0, 9)
#define BRW_3DSTATE_INDEX_BUFFER BRW_3D(3, 0, 0xa)
@@ -32,6 +36,9 @@
#define BRW_3DSTATE_SAMPLER_PALETTE_LOAD BRW_3D(3, 1, 2)
#define BRW_3DSTATE_CHROMA_KEY BRW_3D(3, 1, 4)
#define BRW_3DSTATE_DEPTH_BUFFER BRW_3D(3, 1, 5)
+# define BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
+# define BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
+
#define BRW_3DSTATE_POLY_STIPPLE_OFFSET BRW_3D(3, 1, 6)
#define BRW_3DSTATE_POLY_STIPPLE_PATTERN BRW_3D(3, 1, 7)
#define BRW_3DSTATE_LINE_STIPPLE BRW_3D(3, 1, 8)
@@ -44,6 +51,91 @@
#define BRW_3DPRIMITIVE BRW_3D(3, 3, 0)
+#define BRW_3DSTATE_CLEAR_PARAMS BRW_3D(3, 1, 0x10)
+/* DW1 */
+# define BRW_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
+
+/* for GEN6+ */
+#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS BRW_3D(3, 0, 0x02)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
+
+#define GEN6_3DSTATE_URB BRW_3D(3, 0, 0x05)
+/* DW1 */
+# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
+# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
+/* DW2 */
+# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
+# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
+
+#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS BRW_3D(3, 0, 0x0d)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
+
+#define GEN6_3DSTATE_CC_STATE_POINTERS BRW_3D(3, 0, 0x0e)
+
+#define GEN6_3DSTATE_VS BRW_3D(3, 0, 0x10)
+
+#define GEN6_3DSTATE_GS BRW_3D(3, 0, 0x11)
+/* DW4 */
+# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
+
+#define GEN6_3DSTATE_CLIP BRW_3D(3, 0, 0x12)
+
+#define GEN6_3DSTATE_SF BRW_3D(3, 0, 0x13)
+/* DW1 */
+# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2 */
+/* DW3 */
+# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
+# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
+# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
+# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
+/* DW4 */
+# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
+# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
+# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
+
+
+#define GEN6_3DSTATE_WM BRW_3D(3, 0, 0x14)
+/* DW2 */
+# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27
+# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+/* DW4 */
+# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
+/* DW5 */
+# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
+# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
+# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
+
+
+#define GEN6_3DSTATE_CONSTANT_VS BRW_3D(3, 0, 0x15)
+#define GEN6_3DSTATE_CONSTANT_GS BRW_3D(3, 0, 0x16)
+#define GEN6_3DSTATE_CONSTANT_PS BRW_3D(3, 0, 0x17)
+
+#define GEN6_3DSTATE_SAMPLE_MASK BRW_3D(3, 0, 0x18)
+
+#define GEN6_3DSTATE_MULTISAMPLE BRW_3D(3, 1, 0x0d)
+/* DW1 */
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
+
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
@@ -80,16 +172,22 @@
#define BRW_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define BRW_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define BRW_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
+#define BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
/* VERTEX_BUFFER_STATE Structure */
#define VB0_BUFFER_INDEX_SHIFT 27
+#define GEN6_VB0_BUFFER_INDEX_SHIFT 26
#define VB0_VERTEXDATA (0 << 26)
#define VB0_INSTANCEDATA (1 << 26)
+#define GEN6_VB0_VERTEXDATA (0 << 20)
+#define GEN6_VB0_INSTANCEDATA (1 << 20)
#define VB0_BUFFER_PITCH_SHIFT 0
/* VERTEX_ELEMENT_STATE Structure */
#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27
+#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */
#define VE0_VALID (1 << 26)
+#define GEN6_VE0_VALID (1 << 25) /* for GEN6 */
#define VE0_FORMAT_SHIFT 16
#define VE0_OFFSET_SHIFT 0
#define VE1_VFCOMPONENT_0_SHIFT 28
diff --git a/src/i965_video.c b/src/i965_video.c
index aaf10fa..235dfb9 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -134,6 +134,21 @@ static const uint32_t ps_kernel_planar_static_gen5[][4] = {
#include "exa_wm_write.g4b.gen5"
};
+/* programs for Sandybridge */
+static const uint32_t ps_kernel_packed_static_gen6[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_yuv_rgb.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_planar_static_gen6[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_planar.g6b"
+#include "exa_wm_yuv_rgb.g6b"
+#include "exa_wm_write.g6b"
+};
+
static uint32_t float_to_uint(float f)
{
union {
@@ -1256,4 +1271,616 @@ void i965_free_video(ScrnInfoPtr scrn)
intel->video.gen4_sampler_bo = NULL;
drm_intel_bo_unreference(intel->video.gen4_sip_kernel_bo);
intel->video.gen4_sip_kernel_bo = NULL;
+ drm_intel_bo_unreference(intel->video.wm_prog_packed_bo);
+ intel->video.wm_prog_packed_bo = NULL;
+ drm_intel_bo_unreference(intel->video.wm_prog_planar_bo);
+ intel->video.wm_prog_planar_bo = NULL;
+ drm_intel_bo_unreference(intel->video.gen6_blend_bo);
+ intel->video.gen6_blend_bo = NULL;
+ drm_intel_bo_unreference(intel->video.gen6_depth_stencil_bo);
+ intel->video.gen6_depth_stencil_bo = NULL;
+}
+
+/* for GEN6+ */
+static drm_intel_bo *
+gen6_create_cc_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ struct gen6_color_calc_state *cc_state;
+ drm_intel_bo *cc_bo;
+
+ if (intel_alloc_and_map(
+ intel,
+ "textured video cc state",
+ sizeof(*cc_state),
+ &cc_bo,
+ &cc_state) != 0)
+ return NULL;
+
+ cc_state->constant_r = 1.0;
+ cc_state->constant_g = 0.0;
+ cc_state->constant_b = 1.0;
+ cc_state->constant_a = 1.0;
+
+ drm_intel_bo_unmap(cc_bo);
+ return cc_bo;
+}
+
+static drm_intel_bo *
+gen6_create_blend_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ struct gen6_blend_state *blend_state;
+ drm_intel_bo *blend_bo;
+
+ if (intel_alloc_and_map(
+ intel,
+ "textured video blend state",
+ sizeof(*blend_state),
+ &blend_bo,
+ &blend_state) != 0)
+ return NULL;
+
+ blend_state->blend1.logic_op_enable = 1;
+ blend_state->blend1.logic_op_func = 0xc;
+
+ drm_intel_bo_unmap(blend_bo);
+ return blend_bo;
+}
+
+static drm_intel_bo *
+gen6_create_depth_stencil_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ struct gen6_depth_stencil_state *depth_stencil_state;
+ drm_intel_bo *depth_stencil_bo;
+
+ if (intel_alloc_and_map(
+ intel,
+ "textured video blend state",
+ sizeof(*depth_stencil_state),
+ &depth_stencil_bo,
+ &depth_stencil_state) != 0)
+ return NULL;
+
+ drm_intel_bo_unmap(depth_stencil_bo);
+ return depth_stencil_bo;
+}
+
+static Bool
+gen6_create_vidoe_objects(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ if (intel->video.gen4_sampler_bo == NULL)
+ intel->video.gen4_sampler_bo = i965_create_sampler_state(scrn);
+
+ if (intel->video.wm_prog_packed_bo == NULL)
+ intel->video.wm_prog_packed_bo =
+ i965_create_program(scrn,
+ &ps_kernel_packed_static_gen6[0][0],
+ sizeof(ps_kernel_packed_static_gen6));
+
+ if (intel->video.wm_prog_planar_bo == NULL)
+ intel->video.wm_prog_planar_bo =
+ i965_create_program(scrn,
+ &ps_kernel_planar_static_gen6[0][0],
+ sizeof(ps_kernel_planar_static_gen6));
+
+ if (intel->video.gen4_cc_vp_bo == NULL)
+ intel->video.gen4_cc_vp_bo = i965_create_cc_vp_state(scrn);
+
+ if (intel->video.gen4_cc_bo == NULL)
+ intel->video.gen4_cc_bo = gen6_create_cc_state(scrn);
+
+ if (intel->video.gen6_blend_bo == NULL)
+ intel->video.gen6_blend_bo = gen6_create_blend_state(scrn);
+
+ if (intel->video.gen6_depth_stencil_bo == NULL)
+ intel->video.gen6_depth_stencil_bo = gen6_create_depth_stencil_state(scrn);
+
+
+ return (intel->video.gen4_sampler_bo != NULL &&
+ intel->video.wm_prog_packed_bo != NULL &&
+ intel->video.wm_prog_planar_bo != NULL &&
+ intel->video.gen4_cc_vp_bo != NULL &&
+ intel->video.gen4_cc_bo != NULL &&
+ intel->video.gen6_blend_bo != NULL &&
+ intel->video.gen6_depth_stencil_bo != NULL);
+}
+
+static void
+gen6_upload_invarient_states(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH |
+ BRW_PIPE_CONTROL_WC_FLUSH |
+ BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ BRW_PIPE_CONTROL_NOWRITE);
+ OUT_BATCH(0); /* write address */
+ OUT_BATCH(0); /* write data */
+
+ OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+ OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
+ OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
+ GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
+ OUT_BATCH(0);
+
+ OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
+ OUT_BATCH(1);
+
+ /* Set system instruction pointer */
+ OUT_BATCH(BRW_STATE_SIP | 0);
+ OUT_BATCH(0);
+}
+
+static void
+gen6_upload_state_base_address(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2));
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */
+ OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
+}
+
+static void
+gen6_upload_viewport_state_pointers(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
+ GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
+ (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_RELOC(intel->video.gen4_cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+static void
+gen6_upload_urb(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
+ OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
+ (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
+ OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
+ (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
+}
+
+static void
+gen6_upload_cc_state_pointers(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
+ OUT_RELOC(intel->video.gen6_blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(intel->video.gen6_depth_stencil_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(intel->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+}
+
+static void
+gen6_upload_sampler_state_pointers(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
+ GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
+ (4 - 2));
+ OUT_BATCH(0); /* VS */
+ OUT_BATCH(0); /* GS */
+ OUT_RELOC(intel->video.gen4_sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+static void
+gen6_upload_binding_table(ScrnInfoPtr scrn, uint32_t ps_binding_table_offset)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ /* Binding table pointers */
+ OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS |
+ GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
+ (4 - 2));
+ OUT_BATCH(0); /* vs */
+ OUT_BATCH(0); /* gs */
+ /* Only the PS uses the binding table */
+ OUT_BATCH(ps_binding_table_offset);
+}
+
+static void
+gen6_upload_depth_buffer_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(BRW_3DSTATE_DEPTH_BUFFER | (7 - 2));
+ OUT_BATCH((BRW_SURFACE_NULL << BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT) |
+ (BRW_DEPTHFORMAT_D32_FLOAT << BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(BRW_3DSTATE_CLEAR_PARAMS | (2 - 2));
+ OUT_BATCH(0);
+}
+
+static void
+gen6_upload_drawing_rectangle(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2);
+ OUT_BATCH(0x00000000); /* ymin, xmin */
+ OUT_BATCH((scrn->virtualX - 1) | (scrn->virtualY - 1) << 16); /* ymax, xmax */
+ OUT_BATCH(0x00000000); /* yorigin, xorigin */
+}
+
+static void
+gen6_upload_vs_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ /* disable VS constant buffer */
+ OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
+ OUT_BATCH(0); /* without VS kernel */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* pass-through */
+}
+
+static void
+gen6_upload_gs_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ /* disable GS constant buffer */
+ OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
+ OUT_BATCH(0); /* without GS kernel */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* pass-through */
+}
+
+static void
+gen6_upload_clip_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* pass-through */
+ OUT_BATCH(0);
+}
+
+static void
+gen6_upload_sf_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
+ OUT_BATCH((1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
+ (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
+ (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
+ OUT_BATCH(0);
+ OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
+ OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* DW9 */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* DW14 */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* DW19 */
+}
+
+static void
+gen6_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ /* disable WM constant buffer */
+ OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
+ if (is_packed) {
+ OUT_RELOC(intel->video.wm_prog_packed_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
+ (2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ } else {
+ OUT_RELOC(intel->video.wm_prog_planar_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
+ (7 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ }
+ OUT_BATCH(0);
+ OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
+ OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
+ GEN6_3DSTATE_WM_DISPATCH_ENABLE |
+ GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
+ OUT_BATCH((1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
+ GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+}
+
+static void
+gen6_upload_vertex_element_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ /* Set up our vertex elements, sourced from the single vertex buffer. */
+ OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | (5 - 2));
+ /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
+ OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ GEN6_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT));
+ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+ /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
+ OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ GEN6_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (8 << VE0_OFFSET_SHIFT));
+ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+}
+
+static void
+gen6_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo, int n_src_surf)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ assert(n_src_surf == 1 || n_src_surf == 6);
+ IntelEmitInvarientState(scrn);
+ intel->last_3d = LAST_3D_VIDEO;
+
+ gen6_upload_invarient_states(scrn);
+ gen6_upload_state_base_address(scrn, surface_state_binding_table_bo);
+ gen6_upload_viewport_state_pointers(scrn);
+ gen6_upload_urb(scrn);
+ gen6_upload_cc_state_pointers(scrn);
+ gen6_upload_sampler_state_pointers(scrn);
+ gen6_upload_vs_state(scrn);
+ gen6_upload_gs_state(scrn);
+ gen6_upload_clip_state(scrn);
+ gen6_upload_sf_state(scrn);
+ gen6_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE);
+ gen6_upload_binding_table(scrn, (n_src_surf + 1) * ALIGN(sizeof(struct brw_surface_state), 32));;
+ gen6_upload_depth_buffer_state(scrn);
+ gen6_upload_drawing_rectangle(scrn);
+ gen6_upload_vertex_element_state(scrn);
+}
+
+void Gen6DisplayVideoTextured(ScrnInfoPtr scrn,
+ intel_adaptor_private *adaptor_priv, int id,
+ RegionPtr dstRegion,
+ short width, short height,
+ int video_pitch, int video_pitch2,
+ short src_w, short src_h,
+ short drw_w, short drw_h, PixmapPtr pixmap)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ BoxPtr pbox;
+ int nbox, dxo, dyo, pix_xoff, pix_yoff;
+ float src_scale_x, src_scale_y;
+ int src_surf;
+ int n_src_surf;
+ uint32_t src_surf_format;
+ uint32_t src_surf_base[6];
+ int src_width[6];
+ int src_height[6];
+ int src_pitch[6];
+ drm_intel_bo *surface_state_binding_table_bo;
+
+ src_surf_base[0] = adaptor_priv->YBufOffset;
+ src_surf_base[1] = adaptor_priv->YBufOffset;
+ src_surf_base[2] = adaptor_priv->VBufOffset;
+ src_surf_base[3] = adaptor_priv->VBufOffset;
+ src_surf_base[4] = adaptor_priv->UBufOffset;
+ src_surf_base[5] = adaptor_priv->UBufOffset;
+
+ if (is_planar_fourcc(id)) {
+ src_surf_format = BRW_SURFACEFORMAT_R8_UNORM;
+ src_width[1] = src_width[0] = width;
+ src_height[1] = src_height[0] = height;
+ src_pitch[1] = src_pitch[0] = video_pitch2;
+ src_width[4] = src_width[5] = src_width[2] = src_width[3] =
+ width / 2;
+ src_height[4] = src_height[5] = src_height[2] = src_height[3] =
+ height / 2;
+ src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
+ video_pitch;
+ n_src_surf = 6;
+ } else {
+ if (id == FOURCC_UYVY)
+ src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY;
+ else
+ src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL;
+
+ src_width[0] = width;
+ src_height[0] = height;
+ src_pitch[0] = video_pitch;
+ n_src_surf = 1;
+ }
+
+ surface_state_binding_table_bo =
+ drm_intel_bo_alloc(intel->bufmgr,
+ "surface state & binding table",
+ (n_src_surf + 1) * (ALIGN(sizeof(struct brw_surface_state), 32) + sizeof(uint32_t)),
+ 4096);
+
+ if (!surface_state_binding_table_bo)
+ return;
+
+ i965_create_dst_surface_state(scrn, pixmap, surface_state_binding_table_bo, 0);
+
+ for (src_surf = 0; src_surf < n_src_surf; src_surf++) {
+ i965_create_src_surface_state(scrn,
+ adaptor_priv->buf,
+ src_surf_base[src_surf],
+ src_width[src_surf],
+ src_height[src_surf],
+ src_pitch[src_surf],
+ src_surf_format,
+ surface_state_binding_table_bo,
+ (src_surf + 1) * ALIGN(sizeof(struct brw_surface_state), 32));
+ }
+
+ i965_create_binding_table(scrn, surface_state_binding_table_bo, n_src_surf + 1);
+
+ if (!gen6_create_vidoe_objects(scrn)) {
+ drm_intel_bo_unreference(surface_state_binding_table_bo);
+ return;
+ }
+
+ /* Set up the offset for translating from the given region (in screen
+ * coordinates) to the backing pixmap.
+ */
+#ifdef COMPOSITE
+ pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
+ pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
+#else
+ pix_xoff = 0;
+ pix_yoff = 0;
+#endif
+
+ dxo = dstRegion->extents.x1;
+ dyo = dstRegion->extents.y1;
+
+ /* Use normalized texture coordinates */
+ src_scale_x = ((float)src_w / width) / (float)drw_w;
+ src_scale_y = ((float)src_h / height) / (float)drw_h;
+
+ pbox = REGION_RECTS(dstRegion);
+ nbox = REGION_NUM_RECTS(dstRegion);
+ while (nbox--) {
+ int box_x1 = pbox->x1;
+ int box_y1 = pbox->y1;
+ int box_x2 = pbox->x2;
+ int box_y2 = pbox->y2;
+ int i;
+ drm_intel_bo *vb_bo;
+ float *vb;
+ drm_intel_bo *bo_table[] = {
+ NULL, /* vb_bo */
+ intel->batch_bo,
+ surface_state_binding_table_bo,
+ intel->video.gen4_sampler_bo,
+ intel->video.wm_prog_packed_bo,
+ intel->video.wm_prog_planar_bo,
+ intel->video.gen4_cc_vp_bo,
+ intel->video.gen4_cc_bo,
+ intel->video.gen6_blend_bo,
+ intel->video.gen6_depth_stencil_bo,
+ };
+
+ pbox++;
+
+ if (intel_alloc_and_map(intel, "textured video vb", 4096,
+ &vb_bo, &vb) != 0)
+ break;
+ bo_table[0] = vb_bo;
+
+ i = 0;
+ vb[i++] = (box_x2 - dxo) * src_scale_x;
+ vb[i++] = (box_y2 - dyo) * src_scale_y;
+ vb[i++] = (float)box_x2 + pix_xoff;
+ vb[i++] = (float)box_y2 + pix_yoff;
+
+ vb[i++] = (box_x1 - dxo) * src_scale_x;
+ vb[i++] = (box_y2 - dyo) * src_scale_y;
+ vb[i++] = (float)box_x1 + pix_xoff;
+ vb[i++] = (float)box_y2 + pix_yoff;
+
+ vb[i++] = (box_x1 - dxo) * src_scale_x;
+ vb[i++] = (box_y1 - dyo) * src_scale_y;
+ vb[i++] = (float)box_x1 + pix_xoff;
+ vb[i++] = (float)box_y1 + pix_yoff;
+
+ drm_intel_bo_unmap(vb_bo);
+
+ /* If this command won't fit in the current batch, flush.
+ * Assume that it does after being flushed.
+ */
+ if (drm_intel_bufmgr_check_aperture_space(bo_table, ARRAY_SIZE(bo_table)) < 0)
+ intel_batch_submit(scrn, FALSE);
+
+ intel_batch_start_atomic(scrn, 200);
+ gen6_emit_video_setup(scrn, surface_state_binding_table_bo, n_src_surf);
+
+ /* Set up the pointer to our vertex buffer */
+ OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | (5 - 2));
+ /* four 32-bit floats per vertex */
+ OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
+ GEN6_VB0_VERTEXDATA |
+ ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
+ OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
+ OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, i * 4);
+ OUT_BATCH(0); /* reserved */
+
+ OUT_BATCH(BRW_3DPRIMITIVE |
+ BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (0 << 9) | /* Internal Vertex Count */
+ (6 - 2));
+ OUT_BATCH(3); /* vertex count per instance */
+ OUT_BATCH(0); /* start vertex offset */
+ OUT_BATCH(1); /* single instance */
+ OUT_BATCH(0); /* start instance location */
+ OUT_BATCH(0); /* index buffer offset, ignored */
+
+ intel_batch_end_atomic(scrn);
+ drm_intel_bo_unreference(vb_bo);
+ }
+
+ /* release reference once we're finished */
+ drm_intel_bo_unreference(surface_state_binding_table_bo);
+ intel_debug_flush(scrn);
}
diff --git a/src/intel.h b/src/intel.h
index 7604eee..eb68cb1 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -377,6 +377,10 @@ typedef struct intel_screen_private {
drm_intel_bo *gen4_cc_vp_bo;
drm_intel_bo *gen4_sampler_bo;
drm_intel_bo *gen4_sip_kernel_bo;
+ drm_intel_bo *wm_prog_packed_bo;
+ drm_intel_bo *wm_prog_planar_bo;
+ drm_intel_bo *gen6_blend_bo;
+ drm_intel_bo *gen6_depth_stencil_bo;
} video;
/* Render accel state */
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index e7ca69d..01cb193 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -38,6 +38,7 @@
#include "intel.h"
#include "i830_reg.h"
#include "i915_drm.h"
+#include "i965_reg.h"
#define DUMP_BATCHBUFFERS NULL /* "/tmp/i915-batchbuffers.dump" */
@@ -146,14 +147,26 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn)
assert (!intel->in_batch_atomic);
- /* Big hammer, look to the pipelined flushes in future. */
- flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
- if (INTEL_INFO(intel)->gen >= 40)
+ if ((INTEL_INFO(intel)->gen >= 60)) {
+ BEGIN_BATCH(4);
+ OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH |
+ BRW_PIPE_CONTROL_WC_FLUSH |
+ BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ BRW_PIPE_CONTROL_NOWRITE);
+ OUT_BATCH(0); /* write address */
+ OUT_BATCH(0); /* write data */
+ ADVANCE_BATCH();
+ } else {
+ /* Big hammer, look to the pipelined flushes in future. */
+ flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
+ if (INTEL_INFO(intel)->gen >= 40)
flags = 0;
- BEGIN_BATCH(1);
- OUT_BATCH(MI_FLUSH | flags);
- ADVANCE_BATCH();
+ BEGIN_BATCH(1);
+ OUT_BATCH(MI_FLUSH | flags);
+ ADVANCE_BATCH();
+ }
intel_batch_do_flush(scrn);
}
diff --git a/src/intel_video.h b/src/intel_video.h
index 5920d30..f405d40 100644
--- a/src/intel_video.h
+++ b/src/intel_video.h
@@ -81,6 +81,13 @@ void I965DisplayVideoTextured(ScrnInfoPtr scrn,
short src_w, short src_h,
short drw_w, short drw_h, PixmapPtr pixmap);
+void Gen6DisplayVideoTextured(ScrnInfoPtr scrn,
+ intel_adaptor_private *adaptor_priv,
+ int id, RegionPtr dstRegion, short width,
+ short height, int video_pitch, int video_pitch2,
+ short src_w, short src_h,
+ short drw_w, short drw_h, PixmapPtr pixmap);
+
void i965_free_video(ScrnInfoPtr scrn);
int is_planar_fourcc(int id);
--
1.7.0.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATH v2 5/6] Xv: enable TextureAdaptor for Sandybridge
2010-10-27 2:17 [PATH v2 0/6] Xv on Sandybridge Xiang, Haihao
` (3 preceding siblings ...)
2010-10-27 2:17 ` [PATH v2 4/6] Xv: setup pipeline for Xv " Xiang, Haihao
@ 2010-10-27 2:17 ` Xiang, Haihao
2010-10-27 2:17 ` [PATH v2 6/6] Xv: don't call intel_wait_for_scanline on Sandybridge Xiang, Haihao
5 siblings, 0 replies; 9+ messages in thread
From: Xiang, Haihao @ 2010-10-27 2:17 UTC (permalink / raw)
To: intel-gfx
Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
---
src/intel_video.c | 8 ++++++--
1 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/src/intel_video.c b/src/intel_video.c
index 5d16778..afc2405 100644
--- a/src/intel_video.c
+++ b/src/intel_video.c
@@ -364,7 +364,6 @@ void I830InitVideo(ScreenPtr screen)
*/
if (scrn->bitsPerPixel >= 16 &&
INTEL_INFO(intel)->gen >= 30 &&
- INTEL_INFO(intel)->gen < 60 &&
!intel->use_shadow) {
texturedAdaptor = I830SetupImageVideoTextured(screen);
if (texturedAdaptor != NULL) {
@@ -1583,7 +1582,12 @@ I830PutImageTextured(ScrnInfoPtr scrn,
intel_wait_for_scanline(scrn, pixmap, crtc, clipBoxes);
}
- if (INTEL_INFO(intel)->gen >= 40) {
+ if (INTEL_INFO(intel)->gen >= 60) {
+ Gen6DisplayVideoTextured(scrn, adaptor_priv, id, clipBoxes,
+ width, height, dstPitch, dstPitch2,
+ src_w, src_h,
+ drw_w, drw_h, pixmap);
+ } else if (INTEL_INFO(intel)->gen >= 40) {
I965DisplayVideoTextured(scrn, adaptor_priv, id, clipBoxes,
width, height, dstPitch, dstPitch2,
src_w, src_h,
--
1.7.0.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATH v2 6/6] Xv: don't call intel_wait_for_scanline on Sandybridge
2010-10-27 2:17 [PATH v2 0/6] Xv on Sandybridge Xiang, Haihao
` (4 preceding siblings ...)
2010-10-27 2:17 ` [PATH v2 5/6] Xv: enable TextureAdaptor for Sandybridge Xiang, Haihao
@ 2010-10-27 2:17 ` Xiang, Haihao
2010-10-29 6:42 ` Kenneth Graunke
5 siblings, 1 reply; 9+ messages in thread
From: Xiang, Haihao @ 2010-10-27 2:17 UTC (permalink / raw)
To: intel-gfx
MI_LOAD_SCAN_LINE_INCL command is not available on sandybridge.
Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
---
src/intel_video.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/src/intel_video.c b/src/intel_video.c
index afc2405..cdff149 100644
--- a/src/intel_video.c
+++ b/src/intel_video.c
@@ -1578,7 +1578,7 @@ I830PutImageTextured(ScrnInfoPtr scrn,
return BadAlloc;
}
- if (crtc && adaptor_priv->SyncToVblank != 0) {
+ if (crtc && adaptor_priv->SyncToVblank != 0 && INTEL_INFO(intel)->gen < 60) {
intel_wait_for_scanline(scrn, pixmap, crtc, clipBoxes);
}
--
1.7.0.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATH v2 6/6] Xv: don't call intel_wait_for_scanline on Sandybridge
2010-10-27 2:17 ` [PATH v2 6/6] Xv: don't call intel_wait_for_scanline on Sandybridge Xiang, Haihao
@ 2010-10-29 6:42 ` Kenneth Graunke
2010-10-29 8:20 ` [PATH v2 6/6] Xv: dont " Chris Wilson
0 siblings, 1 reply; 9+ messages in thread
From: Kenneth Graunke @ 2010-10-29 6:42 UTC (permalink / raw)
To: intel-gfx
On Tuesday 26 October 2010 19:17:33 Xiang, Haihao wrote:
> MI_LOAD_SCAN_LINE_INCL command is not available on sandybridge.
>
> Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
> ---
> src/intel_video.c | 2 +-
> 1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/src/intel_video.c b/src/intel_video.c
> index afc2405..cdff149 100644
> --- a/src/intel_video.c
> +++ b/src/intel_video.c
> @@ -1578,7 +1578,7 @@ I830PutImageTextured(ScrnInfoPtr scrn,
> return BadAlloc;
> }
>
> - if (crtc && adaptor_priv->SyncToVblank != 0) {
> + if (crtc && adaptor_priv->SyncToVblank != 0 && INTEL_INFO(intel)->gen <
> 60) { intel_wait_for_scanline(scrn, pixmap, crtc, clipBoxes);
> }
I think you mean < 6 here (not 60)?
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATH v2 6/6] Xv: dont call intel_wait_for_scanline on Sandybridge
2010-10-29 6:42 ` Kenneth Graunke
@ 2010-10-29 8:20 ` Chris Wilson
0 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2010-10-29 8:20 UTC (permalink / raw)
To: Kenneth Graunke, intel-gfx
On Thu, 28 Oct 2010 23:42:05 -0700, Kenneth Graunke <kenneth@whitecape.org> wrote:
> On Tuesday 26 October 2010 19:17:33 Xiang, Haihao wrote:
> > MI_LOAD_SCAN_LINE_INCL command is not available on sandybridge.
> >
> > Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
> > ---
> > src/intel_video.c | 2 +-
> > 1 files changed, 1 insertions(+), 1 deletions(-)
> >
> > diff --git a/src/intel_video.c b/src/intel_video.c
> > index afc2405..cdff149 100644
> > --- a/src/intel_video.c
> > +++ b/src/intel_video.c
> > @@ -1578,7 +1578,7 @@ I830PutImageTextured(ScrnInfoPtr scrn,
> > return BadAlloc;
> > }
> >
> > - if (crtc && adaptor_priv->SyncToVblank != 0) {
> > + if (crtc && adaptor_priv->SyncToVblank != 0 && INTEL_INFO(intel)->gen <
> > 60) { intel_wait_for_scanline(scrn, pixmap, crtc, clipBoxes);
> > }
>
> I think you mean < 6 here (not 60)?
No, 60. It's generation*10 + minor revision, just so that g33 and g45
could be easily identified from their gen3 and gen4 brethen.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 9+ messages in thread