Igt-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib
@ 2018-04-09 15:42 Lukasz Kalamarz
  2018-04-09 16:49 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
                   ` (15 more replies)
  0 siblings, 16 replies; 28+ messages in thread
From: Lukasz Kalamarz @ 2018-04-09 15:42 UTC (permalink / raw)
  To: igt-dev

This patch is starting a series of refactoring changes for *render*
libs. A lot of code in those libraries is copy/pasted and renamed for
different gen.

Changes made in this patch:
- removal of duplicated registers definitions
- move field definitions above it register definition
- move definitions of register into ascending order
- unify spaces between register name and it's address/value

Signed-off-by: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 lib/gen6_render.h | 1598 ++++++++++++++++++++++-------------------------------
 1 file changed, 666 insertions(+), 932 deletions(-)

diff --git a/lib/gen6_render.h b/lib/gen6_render.h
index 8a4ec53..7d658aa 100644
--- a/lib/gen6_render.h
+++ b/lib/gen6_render.h
@@ -9,23 +9,94 @@
 					   ((Subopcode) << 16))
 
 #define GEN6_STATE_BASE_ADDRESS			GEN6_3D(0, 1, 1)
-#define GEN6_STATE_SIP				GEN6_3D(0, 1, 2)
+# define BUFFER_SIZE_MODIFY			       (1 << 0)
+# define BASE_ADDRESS_MODIFY			       (1 << 0)
 
+#define GEN6_STATE_SIP				GEN6_3D(0, 1, 2)
 #define GEN6_PIPELINE_SELECT			GEN6_3D(1, 1, 4)
+# define PIPELINE_SELECT_3D				0
+# define PIPELINE_SELECT_MEDIA				1
 
 #define GEN6_MEDIA_STATE_POINTERS		GEN6_3D(2, 0, 0)
 #define GEN6_MEDIA_OBJECT			GEN6_3D(2, 1, 0)
 
 #define GEN6_3DSTATE_BINDING_TABLE_POINTERS	GEN6_3D(3, 0, 1)
-# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS  (1 << 12)/* for GEN6 */
-# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS  (1 << 9) /* for GEN6 */
-# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS  (1 << 8) /* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS          (1 << 12)/* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS          (1 << 9) /* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS          (1 << 8) /* for GEN6 */
+
+#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS	GEN6_3D(3, 0, 2)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS	       (1 << 12)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS	       (1 << 9)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS	       (1 << 8)
+
+#define GEN6_3DSTATE_URB			GEN6_3D(3, 0, 5)
+/* DW1 */
+# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT		16
+# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT	0
+/* DW2 */
+# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT	8
+# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT		0
 
 #define GEN6_3DSTATE_VERTEX_BUFFERS		GEN6_3D(3, 0, 8)
 #define GEN6_3DSTATE_VERTEX_ELEMENTS		GEN6_3D(3, 0, 9)
 #define GEN6_3DSTATE_INDEX_BUFFER		GEN6_3D(3, 0, 0xa)
 #define GEN6_3DSTATE_VF_STATISTICS		GEN6_3D(1, 0, 0xb)
 
+#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS	GEN6_3D(3, 0, 0xd)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC	       (1 << 12)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF	       (1 << 11)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP       (1 << 10)
+
+#define GEN6_3DSTATE_CC_STATE_POINTERS		GEN6_3D(3, 0, 0xe)
+#define GEN6_3DSTATE_VS				GEN6_3D(3, 0, 0x10)
+#define GEN6_3DSTATE_GS				GEN6_3D(3, 0, 0x11)
+/* DW4 */
+# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT	0
+
+#define GEN6_3DSTATE_CLIP			GEN6_3D(3, 0, 0x12)
+#define GEN6_3DSTATE_SF				GEN6_3D(3, 0, 0x13)
+/* DW1 */
+# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT			22
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT		11
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT		4
+/* DW2 */
+/* DW3 */
+# define GEN6_3DSTATE_SF_CULL_BOTH				(0 << 29)
+# define GEN6_3DSTATE_SF_CULL_NONE				(1 << 29)
+# define GEN6_3DSTATE_SF_CULL_FRONT				(2 << 29)
+# define GEN6_3DSTATE_SF_CULL_BACK				(3 << 29)
+/* DW4 */
+# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT			29
+# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT			27
+# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT			25
+# define GEN6_3DSTATE_SF_VERTEX_SUB_PIXEL_PRECISION_SHIFT	12
+
+#define GEN6_3DSTATE_WM				GEN6_3D(3, 0, 0x14)
+/* DW2 */
+# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT			27
+# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
+/* DW4 */
+# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT		16
+/* DW5 */
+# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT			25
+# define GEN6_3DSTATE_WM_DISPATCH_ENABLE			(1 << 19)
+# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE			(1 << 1)
+# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE			(1 << 0)
+/* DW6 */
+# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT			20
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC	(1 << 15)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 14)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC	(1 << 13)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC		(1 << 12)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 11)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC		(1 << 10)
+
+#define GEN6_3DSTATE_CONSTANT_VS		GEN6_3D(3, 0, 0x15)
+#define GEN6_3DSTATE_CONSTANT_GS		GEN6_3D(3, 0, 0x16)
+#define GEN6_3DSTATE_CONSTANT_PS		GEN6_3D(3, 0, 0x17)
+#define GEN6_3DSTATE_SAMPLE_MASK		GEN6_3D(3, 0, 0x18)
+
 #define GEN6_3DSTATE_DRAWING_RECTANGLE		GEN6_3D(3, 1, 0)
 #define GEN6_3DSTATE_CONSTANT_COLOR		GEN6_3D(3, 1, 1)
 #define GEN6_3DSTATE_SAMPLER_PALETTE_LOAD	GEN6_3D(3, 1, 2)
@@ -34,93 +105,14 @@
 # define GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT	29
 # define GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT	18
 
-#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET		GEN6_3D(3, 1, 6)
+#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET	GEN6_3D(3, 1, 6)
 #define GEN6_3DSTATE_POLY_STIPPLE_PATTERN	GEN6_3D(3, 1, 7)
 #define GEN6_3DSTATE_LINE_STIPPLE		GEN6_3D(3, 1, 8)
 #define GEN6_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP	GEN6_3D(3, 1, 9)
 /* These two are BLC and CTG only, not BW or CL */
 #define GEN6_3DSTATE_AA_LINE_PARAMS		GEN6_3D(3, 1, 0xa)
 #define GEN6_3DSTATE_GS_SVB_INDEX		GEN6_3D(3, 1, 0xb)
-#define GEN6_3DSTATE_MONOFILTER_SIZE		GEN6_3D(3, 1, 0x11)
-#define GEN6_3DPRIMITIVE				GEN6_3D(3, 3, 0)
-
-#define GEN6_3DSTATE_CLEAR_PARAMS		GEN6_3D(3, 1, 0x10)
-/* DW1 */
-# define GEN6_3DSTATE_DEPTH_CLEAR_VALID		(1 << 15)
-
-#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS	GEN6_3D(3, 0, 0x02)
-# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS	(1 << 12)
-# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS	(1 << 9)
-# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS	(1 << 8)
-
-#define GEN6_3DSTATE_URB			GEN6_3D(3, 0, 0x05)
-/* DW1 */
-# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT		16
-# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT	0
-/* DW2 */
-# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT	8
-# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT		0
-
-#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS	GEN6_3D(3, 0, 0x0d)
-# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC		(1 << 12)
-# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF		(1 << 11)
-# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP	(1 << 10)
-
-#define GEN6_3DSTATE_CC_STATE_POINTERS		GEN6_3D(3, 0, 0x0e)
-
-#define GEN6_3DSTATE_VS				GEN6_3D(3, 0, 0x10)
-
-#define GEN6_3DSTATE_GS				GEN6_3D(3, 0, 0x11)
-/* DW4 */
-# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT	0
-
-#define GEN6_3DSTATE_CLIP			GEN6_3D(3, 0, 0x12)
-
-#define GEN6_3DSTATE_SF				GEN6_3D(3, 0, 0x13)
-/* DW1 */
-# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT		22
-# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT	11
-# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT	4
-/* DW2 */
-/* DW3 */
-# define GEN6_3DSTATE_SF_CULL_BOTH			(0 << 29)
-# define GEN6_3DSTATE_SF_CULL_NONE			(1 << 29)
-# define GEN6_3DSTATE_SF_CULL_FRONT			(2 << 29)
-# define GEN6_3DSTATE_SF_CULL_BACK			(3 << 29)
-/* DW4 */
-# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT		29
-# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT		27
-# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT		25
-# define GEN6_3DSTATE_SF_VERTEX_SUB_PIXEL_PRECISION_SHIFT 12
-
-#define GEN6_3DSTATE_WM				GEN6_3D(3, 0, 0x14)
-/* DW2 */
-# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT			27
-# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
-/* DW4 */
-# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT		16
-/* DW5 */
-# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT			25
-# define GEN6_3DSTATE_WM_DISPATCH_ENABLE			(1 << 19)
-# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE			(1 << 1)
-# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE			(1 << 0)
-/* DW6 */
-# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT			20
-# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC	(1 << 15)
-# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 14)
-# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC	(1 << 13)
-# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC		(1 << 12)
-# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 11)
-# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC		(1 << 10)
-
-
-#define GEN6_3DSTATE_CONSTANT_VS		GEN6_3D(3, 0, 0x15)
-#define GEN6_3DSTATE_CONSTANT_GS		GEN6_3D(3, 0, 0x16)
-#define GEN6_3DSTATE_CONSTANT_PS		GEN6_3D(3, 0, 0x17)
-
-#define GEN6_3DSTATE_SAMPLE_MASK		GEN6_3D(3, 0, 0x18)
-
-#define GEN6_3DSTATE_MULTISAMPLE		GEN6_3D(3, 1, 0x0d)
+#define GEN6_3DSTATE_MULTISAMPLE		GEN6_3D(3, 1, 0xd)
 /* DW1 */
 # define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER		(0 << 4)
 # define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT	(1 << 4)
@@ -128,648 +120,394 @@
 # define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4			(2 << 1)
 # define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8			(3 << 1)
 
-#define PIPELINE_SELECT_3D		0
-#define PIPELINE_SELECT_MEDIA		1
+#define GEN6_3DSTATE_CLEAR_PARAMS		GEN6_3D(3, 1, 0x10)
+/* DW1 */
+# define GEN6_3DSTATE_DEPTH_CLEAR_VALID		(1 << 15)
 
-/* for GEN6_STATE_BASE_ADDRESS */
-#define BASE_ADDRESS_MODIFY		(1 << 0)
-
-/* for GEN6_PIPE_CONTROL */
-#define GEN6_PIPE_CONTROL_NOWRITE       (0 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_QWORD   (1 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_DEPTH   (2 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_TIME    (3 << 14)
-#define GEN6_PIPE_CONTROL_DEPTH_STALL   (1 << 13)
-#define GEN6_PIPE_CONTROL_WC_FLUSH      (1 << 12)
-#define GEN6_PIPE_CONTROL_IS_FLUSH      (1 << 11)
-#define GEN6_PIPE_CONTROL_TC_FLUSH      (1 << 10)
-#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
-#define GEN6_PIPE_CONTROL_GLOBAL_GTT    (1 << 2)
-#define GEN6_PIPE_CONTROL_LOCAL_PGTT    (0 << 2)
-#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH	(1 << 0)
-
-/* VERTEX_BUFFER_STATE Structure */
-#define VB0_BUFFER_INDEX_SHIFT	26
-#define VB0_VERTEXDATA			(0 << 20)
-#define VB0_INSTANCEDATA		(1 << 20)
-#define VB0_BUFFER_PITCH_SHIFT		0
-#define VB0_NULL_VERTEX_BUFFER          (1 << 13)
-
-/* VERTEX_ELEMENT_STATE Structure */
-#define VE0_VERTEX_BUFFER_INDEX_SHIFT	26 /* for GEN6 */
-#define VE0_VALID			(1 << 25) /* for GEN6 */
-#define VE0_FORMAT_SHIFT		16
-#define VE0_OFFSET_SHIFT		0
-#define VE1_VFCOMPONENT_0_SHIFT		28
-#define VE1_VFCOMPONENT_1_SHIFT		24
-#define VE1_VFCOMPONENT_2_SHIFT		20
-#define VE1_VFCOMPONENT_3_SHIFT		16
-#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT	0
-
-/* 3DPRIMITIVE bits */
-#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
-#define GEN6_3DPRIMITIVE_VERTEX_RANDOM	  (1 << 15)
-/* Primitive types are in gen6_defines.h */
-#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT	  10
-
-#define GEN6_SVG_CTL		       0x7400
-
-#define GEN6_SVG_CTL_GS_BA	       (0 << 8)
-#define GEN6_SVG_CTL_SS_BA	       (1 << 8)
-#define GEN6_SVG_CTL_IO_BA	       (2 << 8)
-#define GEN6_SVG_CTL_GS_AUB	       (3 << 8)
-#define GEN6_SVG_CTL_IO_AUB	       (4 << 8)
-#define GEN6_SVG_CTL_SIP		       (5 << 8)
-
-#define GEN6_SVG_RDATA		       0x7404
-#define GEN6_SVG_WORK_CTL	       0x7408
-
-#define GEN6_VF_CTL		       0x7500
-
-#define GEN6_VF_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID	   (0 << 8)
-#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG	   (1 << 8)
-#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE   (0 << 4)
-#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX	   (1 << 4)
-#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES	   (1 << 3)
-#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE	   (1 << 2)
-#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE	   (1 << 1)
-#define GEN6_VF_CTL_SNAPSHOT_ENABLE	     	   (1 << 0)
-
-#define GEN6_VF_STRG_VAL		       0x7504
-#define GEN6_VF_STR_VL_OVR	       0x7508
-#define GEN6_VF_VC_OVR		       0x750c
-#define GEN6_VF_STR_PSKIP	       0x7510
-#define GEN6_VF_MAX_PRIM		       0x7514
-#define GEN6_VF_RDATA		       0x7518
-
-#define GEN6_VS_CTL		       0x7600
-#define GEN6_VS_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0	   (0 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1	   (1 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT	   (2 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER  (3 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS		   (1 << 2)
-#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE	   (1 << 1)
-#define GEN6_VS_CTL_SNAPSHOT_ENABLE		   (1 << 0)
-
-#define GEN6_VS_STRG_VAL		       0x7604
-#define GEN6_VS_RDATA		       0x7608
-
-#define GEN6_SF_CTL		       0x7b00
-#define GEN6_SF_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID	   (0 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID	   (2 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID	   (4 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT	   (6 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER  (7 << 8)
-#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE  (1 << 4)
-#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE	   (1 << 3)
-#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS		   (1 << 2)
-#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE	   (1 << 1)
-#define GEN6_SF_CTL_SNAPSHOT_ENABLE		   (1 << 0)
-
-#define GEN6_SF_STRG_VAL		       0x7b04
-#define GEN6_SF_RDATA		       0x7b18
-
-#define GEN6_WIZ_CTL		       0x7c00
-#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT	   16
-#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER   (0 << 8)
-#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE     (1 << 8)
-#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE   (2 << 8)
-#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH	      (1 << 6)
-#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS    (1 << 5)
-#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE   (1 << 4)
-#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG	      (1 << 3)
-#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS	      (1 << 2)
-#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE	      (1 << 1)
-#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE		      (1 << 0)
-
-#define GEN6_WIZ_STRG_VAL			      0x7c04
-#define GEN6_WIZ_RDATA				      0x7c18
-
-#define GEN6_TS_CTL		       0x7e00
-#define GEN6_TS_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR	   (0 << 8)
-#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR   (3 << 8)
-#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS	   (1 << 2)
-#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS  	   (1 << 1)
-#define GEN6_TS_CTL_SNAPSHOT_ENABLE		   (1 << 0)
-
-#define GEN6_TS_STRG_VAL		       0x7e04
-#define GEN6_TS_RDATA		       0x7e08
-
-#define GEN6_TD_CTL_MUX_SHIFT	       8
-#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH	   (1 << 7)
-#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT		   (1 << 6)
-#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE	   (1 << 5)
-#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE  (1 << 4)
-#define GEN6_TD_CTL_BREAKPOINT_ENABLE		   (1 << 2)
-#define GEN6_TD_CTL2		       0x8004
-#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
-#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE      (1 << 26)
-#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE	      (1 << 25)
-#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT	      16
-#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE	      (1 << 8)
-#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
-#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE	      (1 << 6)
-#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE	      (1 << 5)
-#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE     (1 << 4)
-#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE	      (1 << 3)
-#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE	      (1 << 0)
-#define GEN6_TD_VF_VS_EMSK	       0x8008
-#define GEN6_TD_GS_EMSK		       0x800c
-#define GEN6_TD_CLIP_EMSK	       0x8010
-#define GEN6_TD_SF_EMSK		       0x8014
-#define GEN6_TD_WIZ_EMSK		       0x8018
-#define GEN6_TD_0_6_EHTRG_VAL	       0x801c
-#define GEN6_TD_0_7_EHTRG_VAL	       0x8020
-#define GEN6_TD_0_6_EHTRG_MSK           0x8024
-#define GEN6_TD_0_7_EHTRG_MSK	       0x8028
-#define GEN6_TD_RDATA		       0x802c
-#define GEN6_TD_TS_EMSK		       0x8030
-
-#define GEN6_EU_CTL		       0x8800
-#define GEN6_EU_CTL_SELECT_SHIFT	       16
-#define GEN6_EU_CTL_DATA_MUX_SHIFT      8
-#define GEN6_EU_ATT_0		       0x8810
-#define GEN6_EU_ATT_1		       0x8814
-#define GEN6_EU_ATT_DATA_0	       0x8820
-#define GEN6_EU_ATT_DATA_1	       0x8824
-#define GEN6_EU_ATT_CLR_0	       0x8830
-#define GEN6_EU_ATT_CLR_1	       0x8834
-#define GEN6_EU_RDATA		       0x8840
+#define GEN6_3DSTATE_MONOFILTER_SIZE		GEN6_3D(3, 1, 0x11)
 
 #define GEN6_PIPE_CONTROL			GEN6_3D(3, 2, 0)
+# define GEN6_PIPE_CONTROL_NOWRITE		(0 << 14)
+# define GEN6_PIPE_CONTROL_WRITE_QWORD		(1 << 14)
+# define GEN6_PIPE_CONTROL_WRITE_DEPTH		(2 << 14)
+# define GEN6_PIPE_CONTROL_WRITE_TIME    	(3 << 14)
+# define GEN6_PIPE_CONTROL_DEPTH_STALL   	(1 << 13)
+# define GEN6_PIPE_CONTROL_WC_FLUSH      	(1 << 12)
+# define GEN6_PIPE_CONTROL_IS_FLUSH      	(1 << 11)
+# define GEN6_PIPE_CONTROL_TC_FLUSH      	(1 << 10)
+# define GEN6_PIPE_CONTROL_NOTIFY_ENABLE 	(1 << 8)
+# define GEN6_PIPE_CONTROL_GLOBAL_GTT    	(1 << 2)
+# define GEN6_PIPE_CONTROL_LOCAL_PGTT    	(0 << 2)
+# define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH	(1 << 0)
 
-#define GEN6_3DPRIMITIVE				GEN6_3D(3, 3, 0)
+#define GEN6_3DPRIMITIVE			GEN6_3D(3, 3, 0)
+# define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL 	(0 << 15)
+# define GEN6_3DPRIMITIVE_VERTEX_RANDOM	  	(1 << 15)
+/* Primitive types are in gen6_defines.h */
+# define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT	10
 
-#define GEN6_3DSTATE_CLEAR_PARAMS		GEN6_3D(3, 1, 0x10)
-/* DW1 */
-# define GEN6_3DSTATE_DEPTH_CLEAR_VALID		(1 << 15)
+/* VERTEX_BUFFER_STATE Structure */
+# define VB0_BUFFER_INDEX_SHIFT	26
+# define VB0_VERTEXDATA				(0 << 20)
+# define VB0_INSTANCEDATA			(1 << 20)
+# define VB0_BUFFER_PITCH_SHIFT			0
+# define VB0_NULL_VERTEX_BUFFER          	(1 << 13)
 
-/* for GEN6+ */
-#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS	GEN6_3D(3, 0, 0x02)
-# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS	(1 << 12)
-# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS	(1 << 9)
-# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS	(1 << 8)
+/* VERTEX_ELEMENT_STATE Structure */
+# define VE0_VERTEX_BUFFER_INDEX_SHIFT		26 /* for GEN6 */
+# define VE0_VALID				(1 << 25) /* for GEN6 */
+# define VE0_FORMAT_SHIFT			16
+# define VE0_OFFSET_SHIFT			0
+# define VE1_VFCOMPONENT_0_SHIFT		28
+# define VE1_VFCOMPONENT_1_SHIFT		24
+# define VE1_VFCOMPONENT_2_SHIFT		20
+# define VE1_VFCOMPONENT_3_SHIFT		16
+# define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT	0
 
-#define GEN6_3DSTATE_URB			GEN6_3D(3, 0, 0x05)
-/* DW1 */
-# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT		16
-# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT	0
-/* DW2 */
-# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT	8
-# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT		0
+#define GEN6_SVG_CTL				0x7400
+# define GEN6_SVG_CTL_GS_BA			(0 << 8)
+# define GEN6_SVG_CTL_SS_BA			(1 << 8)
+# define GEN6_SVG_CTL_IO_BA			(2 << 8)
+# define GEN6_SVG_CTL_GS_AUB			(3 << 8)
+# define GEN6_SVG_CTL_IO_AUB			(4 << 8)
+# define GEN6_SVG_CTL_SIP			(5 << 8)
 
-#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS	GEN6_3D(3, 0, 0x0d)
-# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC		(1 << 12)
-# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF		(1 << 11)
-# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP	(1 << 10)
+#define GEN6_SVG_RDATA				0x7404
+#define GEN6_SVG_WORK_CTL			0x7408
+#define GEN6_VF_CTL				0x7500
+# define GEN6_VF_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
+# define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID  (0 << 8)
+# define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG  (1 << 8)
+# define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
+# define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX	   (1 << 4)
+# define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES	   (1 << 3)
+# define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE   (1 << 2)
+# define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE	   (1 << 1)
+# define GEN6_VF_CTL_SNAPSHOT_ENABLE	     	   (1 << 0)
 
-#define GEN6_3DSTATE_CC_STATE_POINTERS		GEN6_3D(3, 0, 0x0e)
+#define GEN6_VF_STRG_VAL			0x7504
+#define GEN6_VF_STR_VL_OVR			0x7508
+#define GEN6_VF_VC_OVR				0x750c
+#define GEN6_VF_STR_PSKIP			0x7510
+#define GEN6_VF_MAX_PRIM			0x7514
+#define GEN6_VF_RDATA				0x7518
+#define GEN6_VS_CTL				0x7600
+# define GEN6_VS_CTL_SNAPSHOT_COMPLETE		    (1 << 31)
+# define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0	    (0 << 8)
+# define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1	    (1 << 8)
+# define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT	    (2 << 8)
+# define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
+# define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS	    (1 << 2)
+# define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE	    (1 << 1)
+# define GEN6_VS_CTL_SNAPSHOT_ENABLE		    (1 << 0)
 
-#define GEN6_3DSTATE_VS				GEN6_3D(3, 0, 0x10)
+#define GEN6_VS_STRG_VAL			0x7604
+#define GEN6_VS_RDATA				0x7608
+#define GEN6_SF_CTL				0x7b00
+# define GEN6_SF_CTL_SNAPSHOT_COMPLETE			(1 << 31)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID	(0 << 8)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT	(1 << 8)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID	(2 << 8)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT	(3 << 8)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID	(4 << 8)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT	(5 << 8)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT		(6 << 8)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER	(7 << 8)
+# define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE	(1 << 4)
+# define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE	(1 << 3)
+# define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS		(1 << 2)
+# define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE		(1 << 1)
+# define GEN6_SF_CTL_SNAPSHOT_ENABLE			(1 << 0)
 
-#define GEN6_3DSTATE_GS				GEN6_3D(3, 0, 0x11)
-/* DW4 */
-# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT	0
+#define GEN6_SF_STRG_VAL			0x7b04
+#define GEN6_SF_RDATA				0x7b18
+#define GEN6_WIZ_CTL				0x7c00
+# define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE			(1 << 31)
+# define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT		16
+# define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER   (0 << 8)
+# define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE     (1 << 8)
+# define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE   (2 << 8)
+# define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH		(1 << 6)
+# define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS    (1 << 5)
+# define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE   (1 << 4)
+# define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG	(1 << 3)
+# define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS		(1 << 2)
+# define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE		(1 << 1)
+# define GEN6_WIZ_CTL_SNAPSHOT_ENABLE			(1 << 0)
 
-#define GEN6_3DSTATE_CLIP			GEN6_3D(3, 0, 0x12)
+#define GEN6_WIZ_STRG_VAL			0x7c04
+#define GEN6_WIZ_RDATA				0x7c18
+#define GEN6_TS_CTL				0x7e00
+# define GEN6_TS_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
+# define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR	   (0 << 8)
+# define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
+# define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS	   (1 << 2)
+# define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS	   (1 << 1)
+# define GEN6_TS_CTL_SNAPSHOT_ENABLE		   (1 << 0)
 
-#define GEN6_3DSTATE_SF				GEN6_3D(3, 0, 0x13)
-/* DW1 */
-# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT		22
-# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT	11
-# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT	4
-/* DW2 */
-/* DW3 */
-# define GEN6_3DSTATE_SF_CULL_BOTH			(0 << 29)
-# define GEN6_3DSTATE_SF_CULL_NONE			(1 << 29)
-# define GEN6_3DSTATE_SF_CULL_FRONT			(2 << 29)
-# define GEN6_3DSTATE_SF_CULL_BACK			(3 << 29)
-/* DW4 */
-# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT		29
-# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT		27
-# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT		25
+#define GEN6_TS_STRG_VAL			0x7e04
+#define GEN6_TS_RDATA				0x7e08
 
+# define GEN6_TD_CTL_MUX_SHIFT				8
+# define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH	(1 << 7)
+# define GEN6_TD_CTL_FORCE_EXTERNAL_HALT		(1 << 6)
+# define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE		(1 << 5)
+# define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE	(1 << 4)
+# define GEN6_TD_CTL_BREAKPOINT_ENABLE			(1 << 2)
 
-#define GEN6_3DSTATE_WM				GEN6_3D(3, 0, 0x14)
-/* DW2 */
-# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT			27
-# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
-/* DW4 */
-# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT		16
-/* DW5 */
-# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT			25
-# define GEN6_3DSTATE_WM_DISPATCH_ENABLE			(1 << 19)
-# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE			(1 << 1)
-# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE			(1 << 0)
-/* DW6 */
-# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT			20
-# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC	(1 << 15)
-# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 14)
-# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC	(1 << 13)
-# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC		(1 << 12)
-# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 11)
-# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC		(1 << 10)
+# define GEN6_TD_CTL2				0x8004
+# define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE	    (1 << 28)
+# define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE	    (1 << 26)
+# define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE	    (1 << 25)
+# define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT		    16
+# define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE	    (1 << 8)
+# define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE  (1 << 7)
+# define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE		    (1 << 6)
+# define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE		    (1 << 5)
+# define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE	    (1 << 4)
+# define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE		    (1 << 3)
+# define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE		    (1 << 0)
 
+#define GEN6_TD_VF_VS_EMSK			0x8008
+#define GEN6_TD_GS_EMSK				0x800c
+#define GEN6_TD_CLIP_EMSK			0x8010
+#define GEN6_TD_SF_EMSK				0x8014
+#define GEN6_TD_WIZ_EMSK			0x8018
+#define GEN6_TD_0_6_EHTRG_VAL			0x801c
+#define GEN6_TD_0_7_EHTRG_VAL			0x8020
+#define GEN6_TD_0_6_EHTRG_MSK			0x8024
+#define GEN6_TD_0_7_EHTRG_MSK			0x8028
+#define GEN6_TD_RDATA				0x802c
+#define GEN6_TD_TS_EMSK				0x8030
+#define GEN6_EU_CTL				0x8800
+# define GEN6_EU_CTL_SELECT_SHIFT		16
+# define GEN6_EU_CTL_DATA_MUX_SHIFT		8
 
-#define GEN6_3DSTATE_CONSTANT_VS		GEN6_3D(3, 0, 0x15)
-#define GEN6_3DSTATE_CONSTANT_GS          	GEN6_3D(3, 0, 0x16)
-#define GEN6_3DSTATE_CONSTANT_PS          	GEN6_3D(3, 0, 0x17)
+#define GEN6_EU_ATT_0				0x8810
+#define GEN6_EU_ATT_1				0x8814
+#define GEN6_EU_ATT_DATA_0			0x8820
+#define GEN6_EU_ATT_DATA_1			0x8824
+#define GEN6_EU_ATT_CLR_0			0x8830
+#define GEN6_EU_ATT_CLR_1			0x8834
+#define GEN6_EU_RDATA				0x8840
 
-#define GEN6_3DSTATE_SAMPLE_MASK		GEN6_3D(3, 0, 0x18)
-
-#define GEN6_3DSTATE_MULTISAMPLE		GEN6_3D(3, 1, 0x0d)
-/* DW1 */
-# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER		(0 << 4)
-# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT	(1 << 4)
-# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1			(0 << 1)
-# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4			(2 << 1)
-# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8			(3 << 1)
-
-#define PIPELINE_SELECT_3D		0
-#define PIPELINE_SELECT_MEDIA		1
-
-#define UF0_CS_REALLOC			(1 << 13)
-#define UF0_VFE_REALLOC			(1 << 12)
-#define UF0_SF_REALLOC			(1 << 11)
-#define UF0_CLIP_REALLOC		(1 << 10)
-#define UF0_GS_REALLOC			(1 << 9)
-#define UF0_VS_REALLOC			(1 << 8)
-#define UF1_CLIP_FENCE_SHIFT		20
-#define UF1_GS_FENCE_SHIFT		10
-#define UF1_VS_FENCE_SHIFT		0
-#define UF2_CS_FENCE_SHIFT		20
-#define UF2_VFE_FENCE_SHIFT		10
-#define UF2_SF_FENCE_SHIFT		0
-
-/* for GEN6_STATE_BASE_ADDRESS */
-#define BASE_ADDRESS_MODIFY		(1 << 0)
-#define BUFFER_SIZE_MODIFY		(1 << 0)
+#define UF0_CS_REALLOC				(1 << 13)
+#define UF0_VFE_REALLOC				(1 << 12)
+#define UF0_SF_REALLOC				(1 << 11)
+#define UF0_CLIP_REALLOC			(1 << 10)
+#define UF0_GS_REALLOC				(1 << 9)
+#define UF0_VS_REALLOC				(1 << 8)
+#define UF1_CLIP_FENCE_SHIFT			20
+#define UF1_GS_FENCE_SHIFT			10
+#define UF1_VS_FENCE_SHIFT			0
+#define UF2_CS_FENCE_SHIFT			20
+#define UF2_VFE_FENCE_SHIFT			10
+#define UF2_SF_FENCE_SHIFT			0
 
 /* for GEN6_3DSTATE_PIPELINED_POINTERS */
-#define GEN6_GS_DISABLE		       0
-#define GEN6_GS_ENABLE		       1
-#define GEN6_CLIP_DISABLE	       0
-#define GEN6_CLIP_ENABLE		       1
+#define GEN6_GS_DISABLE				0
+#define GEN6_GS_ENABLE				1
+#define GEN6_CLIP_DISABLE			0
+#define GEN6_CLIP_ENABLE			1
 
-/* for GEN6_PIPE_CONTROL */
-#define GEN6_PIPE_CONTROL_NOWRITE       (0 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_QWORD   (1 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_DEPTH   (2 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_TIME    (3 << 14)
-#define GEN6_PIPE_CONTROL_DEPTH_STALL   (1 << 13)
-#define GEN6_PIPE_CONTROL_WC_FLUSH      (1 << 12)
-#define GEN6_PIPE_CONTROL_IS_FLUSH      (1 << 11)
-#define GEN6_PIPE_CONTROL_TC_FLUSH      (1 << 10)
-#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
-#define GEN6_PIPE_CONTROL_GLOBAL_GTT    (1 << 2)
-#define GEN6_PIPE_CONTROL_LOCAL_PGTT    (0 << 2)
-#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH	(1 << 0)
+/* 3D state */
+#define _3DOP_3DSTATE_PIPELINED			0x0
+#define _3DOP_3DSTATE_NONPIPELINED		0x1
+#define _3DOP_3DCONTROL				0x2
+#define _3DOP_3DPRIMITIVE			0x3
 
-/* 3DPRIMITIVE bits */
-#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
-#define GEN6_3DPRIMITIVE_VERTEX_RANDOM	  (1 << 15)
-/* Primitive types are in gen6_defines.h */
-#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT	  10
+#define _3DSTATE_PIPELINED_POINTERS		0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS		0x01
+#define _3DSTATE_VERTEX_BUFFERS			0x08
+#define _3DSTATE_VERTEX_ELEMENTS		0x09
+#define _3DSTATE_INDEX_BUFFER			0x0A
+#define _3DSTATE_VF_STATISTICS			0x0B
+#define _3DSTATE_DRAWING_RECTANGLE		0x00
+#define _3DSTATE_CONSTANT_COLOR			0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD		0x02
+#define _3DSTATE_CHROMA_KEY			0x04
+#define _3DSTATE_DEPTH_BUFFER			0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET		0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN		0x07
+#define _3DSTATE_LINE_STIPPLE			0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP	0x09
+#define _3DCONTROL				0x00
+#define _3DPRIMITIVE				0x00
 
-#define GEN6_SVG_CTL		       0x7400
+#define _3DPRIM_POINTLIST			0x01
+#define _3DPRIM_LINELIST			0x02
+#define _3DPRIM_LINESTRIP			0x03
+#define _3DPRIM_TRILIST				0x04
+#define _3DPRIM_TRISTRIP			0x05
+#define _3DPRIM_TRIFAN				0x06
+#define _3DPRIM_QUADLIST			0x07
+#define _3DPRIM_QUADSTRIP			0x08
+#define _3DPRIM_LINELIST_ADJ			0x09
+#define _3DPRIM_LINESTRIP_ADJ			0x0A
+#define _3DPRIM_TRILIST_ADJ			0x0B
+#define _3DPRIM_TRISTRIP_ADJ			0x0C
+#define _3DPRIM_TRISTRIP_REVERSE		0x0D
+#define _3DPRIM_POLYGON				0x0E
+#define _3DPRIM_RECTLIST			0x0F
+#define _3DPRIM_LINELOOP			0x10
+#define _3DPRIM_POINTLIST_BF			0x11
+#define _3DPRIM_LINESTRIP_CONT			0x12
+#define _3DPRIM_LINESTRIP_BF			0x13
+#define _3DPRIM_LINESTRIP_CONT_BF		0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE		0x15
 
-#define GEN6_SVG_CTL_GS_BA	       (0 << 8)
-#define GEN6_SVG_CTL_SS_BA	       (1 << 8)
-#define GEN6_SVG_CTL_IO_BA	       (2 << 8)
-#define GEN6_SVG_CTL_GS_AUB	       (3 << 8)
-#define GEN6_SVG_CTL_IO_AUB	       (4 << 8)
-#define GEN6_SVG_CTL_SIP		       (5 << 8)
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL	0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM	1
 
-#define GEN6_SVG_RDATA		       0x7404
-#define GEN6_SVG_WORK_CTL	       0x7408
+#define GEN6_ANISORATIO_2			0
+#define GEN6_ANISORATIO_4			1
+#define GEN6_ANISORATIO_6			2
+#define GEN6_ANISORATIO_8			3
+#define GEN6_ANISORATIO_10			4
+#define GEN6_ANISORATIO_12			5
+#define GEN6_ANISORATIO_14			6
+#define GEN6_ANISORATIO_16			7
 
-#define GEN6_VF_CTL		       0x7500
+#define GEN6_BLENDFACTOR_ONE			0x1
+#define GEN6_BLENDFACTOR_SRC_COLOR		0x2
+#define GEN6_BLENDFACTOR_SRC_ALPHA		0x3
+#define GEN6_BLENDFACTOR_DST_ALPHA		0x4
+#define GEN6_BLENDFACTOR_DST_COLOR		0x5
+#define GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE	0x6
+#define GEN6_BLENDFACTOR_CONST_COLOR		0x7
+#define GEN6_BLENDFACTOR_CONST_ALPHA		0x8
+#define GEN6_BLENDFACTOR_SRC1_COLOR		0x9
+#define GEN6_BLENDFACTOR_SRC1_ALPHA		0x0A
+#define GEN6_BLENDFACTOR_ZERO			0x11
+#define GEN6_BLENDFACTOR_INV_SRC_COLOR		0x12
+#define GEN6_BLENDFACTOR_INV_SRC_ALPHA		0x13
+#define GEN6_BLENDFACTOR_INV_DST_ALPHA		0x14
+#define GEN6_BLENDFACTOR_INV_DST_COLOR		0x15
+#define GEN6_BLENDFACTOR_INV_CONST_COLOR	0x17
+#define GEN6_BLENDFACTOR_INV_CONST_ALPHA	0x18
+#define GEN6_BLENDFACTOR_INV_SRC1_COLOR		0x19
+#define GEN6_BLENDFACTOR_INV_SRC1_ALPHA		0x1A
 
-#define GEN6_VF_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID	   (0 << 8)
-#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG	   (1 << 8)
-#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE   (0 << 4)
-#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX	   (1 << 4)
-#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES	   (1 << 3)
-#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE	   (1 << 2)
-#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE	   (1 << 1)
-#define GEN6_VF_CTL_SNAPSHOT_ENABLE	     	   (1 << 0)
+#define GEN6_BLENDFUNCTION_ADD			0
+#define GEN6_BLENDFUNCTION_SUBTRACT		1
+#define GEN6_BLENDFUNCTION_REVERSE_SUBTRACT	2
+#define GEN6_BLENDFUNCTION_MIN			3
+#define GEN6_BLENDFUNCTION_MAX			4
 
-#define GEN6_VF_STRG_VAL		       0x7504
-#define GEN6_VF_STR_VL_OVR	       0x7508
-#define GEN6_VF_VC_OVR		       0x750c
-#define GEN6_VF_STR_PSKIP	       0x7510
-#define GEN6_VF_MAX_PRIM		       0x7514
-#define GEN6_VF_RDATA		       0x7518
+#define GEN6_ALPHATEST_FORMAT_UNORM8		0
+#define GEN6_ALPHATEST_FORMAT_FLOAT32		1
 
-#define GEN6_VS_CTL		       0x7600
-#define GEN6_VS_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0	   (0 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1	   (1 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT	   (2 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER  (3 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS		   (1 << 2)
-#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE	   (1 << 1)
-#define GEN6_VS_CTL_SNAPSHOT_ENABLE		   (1 << 0)
+#define GEN6_CHROMAKEY_KILL_ON_ANY_MATCH	0
+#define GEN6_CHROMAKEY_REPLACE_BLACK		1
 
-#define GEN6_VS_STRG_VAL		       0x7604
-#define GEN6_VS_RDATA		       0x7608
+#define GEN6_CLIP_API_OGL			0
+#define GEN6_CLIP_API_DX			1
 
-#define GEN6_SF_CTL		       0x7b00
-#define GEN6_SF_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID	   (0 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID	   (2 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID	   (4 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT	   (6 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER  (7 << 8)
-#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE  (1 << 4)
-#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE	   (1 << 3)
-#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS		   (1 << 2)
-#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE	   (1 << 1)
-#define GEN6_SF_CTL_SNAPSHOT_ENABLE		   (1 << 0)
+#define GEN6_CLIPMODE_NORMAL			0
+#define GEN6_CLIPMODE_CLIP_ALL			1
+#define GEN6_CLIPMODE_CLIP_NON_REJECTED		2
+#define GEN6_CLIPMODE_REJECT_ALL		3
+#define GEN6_CLIPMODE_ACCEPT_ALL		4
 
-#define GEN6_SF_STRG_VAL		       0x7b04
-#define GEN6_SF_RDATA		       0x7b18
+#define GEN6_CLIP_NDCSPACE			0
+#define GEN6_CLIP_SCREENSPACE			1
 
-#define GEN6_WIZ_CTL		       0x7c00
-#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT	   16
-#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER   (0 << 8)
-#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE     (1 << 8)
-#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE   (2 << 8)
-#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH	      (1 << 6)
-#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS    (1 << 5)
-#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE   (1 << 4)
-#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG	      (1 << 3)
-#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS	      (1 << 2)
-#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE	      (1 << 1)
-#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE		      (1 << 0)
+#define GEN6_COMPAREFUNCTION_ALWAYS		0
+#define GEN6_COMPAREFUNCTION_NEVER		1
+#define GEN6_COMPAREFUNCTION_LESS		2
+#define GEN6_COMPAREFUNCTION_EQUAL		3
+#define GEN6_COMPAREFUNCTION_LEQUAL		4
+#define GEN6_COMPAREFUNCTION_GREATER		5
+#define GEN6_COMPAREFUNCTION_NOTEQUAL		6
+#define GEN6_COMPAREFUNCTION_GEQUAL		7
 
-#define GEN6_WIZ_STRG_VAL			      0x7c04
-#define GEN6_WIZ_RDATA				      0x7c18
+#define GEN6_COVERAGE_PIXELS_HALF		0
+#define GEN6_COVERAGE_PIXELS_1			1
+#define GEN6_COVERAGE_PIXELS_2			2
+#define GEN6_COVERAGE_PIXELS_4			3
 
-#define GEN6_TS_CTL		       0x7e00
-#define GEN6_TS_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR	   (0 << 8)
-#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR   (3 << 8)
-#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS	   (1 << 2)
-#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS  	   (1 << 1)
-#define GEN6_TS_CTL_SNAPSHOT_ENABLE		   (1 << 0)
+#define GEN6_CULLMODE_BOTH			0
+#define GEN6_CULLMODE_NONE			1
+#define GEN6_CULLMODE_FRONT			2
+#define GEN6_CULLMODE_BACK			3
 
-#define GEN6_TS_STRG_VAL		       0x7e04
-#define GEN6_TS_RDATA		       0x7e08
+#define GEN6_DEFAULTCOLOR_R8G8B8A8_UNORM	0
+#define GEN6_DEFAULTCOLOR_R32G32B32A32_FLOAT	1
 
-#define GEN6_TD_CTL_MUX_SHIFT	       8
-#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH	   (1 << 7)
-#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT		   (1 << 6)
-#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE	   (1 << 5)
-#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE  (1 << 4)
-#define GEN6_TD_CTL_BREAKPOINT_ENABLE		   (1 << 2)
-#define GEN6_TD_CTL2		       0x8004
-#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
-#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE      (1 << 26)
-#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE	      (1 << 25)
-#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT	      16
-#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE	      (1 << 8)
-#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
-#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE	      (1 << 6)
-#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE	      (1 << 5)
-#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE     (1 << 4)
-#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE	      (1 << 3)
-#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE	      (1 << 0)
-#define GEN6_TD_VF_VS_EMSK	       0x8008
-#define GEN6_TD_GS_EMSK		       0x800c
-#define GEN6_TD_CLIP_EMSK	       0x8010
-#define GEN6_TD_SF_EMSK		       0x8014
-#define GEN6_TD_WIZ_EMSK		       0x8018
-#define GEN6_TD_0_6_EHTRG_VAL	       0x801c
-#define GEN6_TD_0_7_EHTRG_VAL	       0x8020
-#define GEN6_TD_0_6_EHTRG_MSK           0x8024
-#define GEN6_TD_0_7_EHTRG_MSK	       0x8028
-#define GEN6_TD_RDATA		       0x802c
-#define GEN6_TD_TS_EMSK		       0x8030
+#define GEN6_DEPTHFORMAT_D32_FLOAT_S8X24_UINT	0
+#define GEN6_DEPTHFORMAT_D32_FLOAT		1
+#define GEN6_DEPTHFORMAT_D24_UNORM_S8_UINT	2
+#define GEN6_DEPTHFORMAT_D16_UNORM		5
 
-#define GEN6_EU_CTL		       0x8800
-#define GEN6_EU_CTL_SELECT_SHIFT	       16
-#define GEN6_EU_CTL_DATA_MUX_SHIFT      8
-#define GEN6_EU_ATT_0		       0x8810
-#define GEN6_EU_ATT_1		       0x8814
-#define GEN6_EU_ATT_DATA_0	       0x8820
-#define GEN6_EU_ATT_DATA_1	       0x8824
-#define GEN6_EU_ATT_CLR_0	       0x8830
-#define GEN6_EU_ATT_CLR_1	       0x8834
-#define GEN6_EU_RDATA		       0x8840
+#define GEN6_FLOATING_POINT_IEEE_754		0
+#define GEN6_FLOATING_POINT_NON_IEEE_754	1
 
-/* 3D state:
- */
-#define _3DOP_3DSTATE_PIPELINED       0x0
-#define _3DOP_3DSTATE_NONPIPELINED    0x1
-#define _3DOP_3DCONTROL               0x2
-#define _3DOP_3DPRIMITIVE             0x3
+#define GEN6_FRONTWINDING_CW			0
+#define GEN6_FRONTWINDING_CCW			1
 
-#define _3DSTATE_PIPELINED_POINTERS       0x00
-#define _3DSTATE_BINDING_TABLE_POINTERS   0x01
-#define _3DSTATE_VERTEX_BUFFERS           0x08
-#define _3DSTATE_VERTEX_ELEMENTS          0x09
-#define _3DSTATE_INDEX_BUFFER             0x0A
-#define _3DSTATE_VF_STATISTICS            0x0B
-#define _3DSTATE_DRAWING_RECTANGLE            0x00
-#define _3DSTATE_CONSTANT_COLOR               0x01
-#define _3DSTATE_SAMPLER_PALETTE_LOAD         0x02
-#define _3DSTATE_CHROMA_KEY                   0x04
-#define _3DSTATE_DEPTH_BUFFER                 0x05
-#define _3DSTATE_POLY_STIPPLE_OFFSET          0x06
-#define _3DSTATE_POLY_STIPPLE_PATTERN         0x07
-#define _3DSTATE_LINE_STIPPLE                 0x08
-#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP    0x09
-#define _3DCONTROL    0x00
-#define _3DPRIMITIVE  0x00
+#define GEN6_INDEX_BYTE				0
+#define GEN6_INDEX_WORD				1
+#define GEN6_INDEX_DWORD			2
 
-#define _3DPRIM_POINTLIST         0x01
-#define _3DPRIM_LINELIST          0x02
-#define _3DPRIM_LINESTRIP         0x03
-#define _3DPRIM_TRILIST           0x04
-#define _3DPRIM_TRISTRIP          0x05
-#define _3DPRIM_TRIFAN            0x06
-#define _3DPRIM_QUADLIST          0x07
-#define _3DPRIM_QUADSTRIP         0x08
-#define _3DPRIM_LINELIST_ADJ      0x09
-#define _3DPRIM_LINESTRIP_ADJ     0x0A
-#define _3DPRIM_TRILIST_ADJ       0x0B
-#define _3DPRIM_TRISTRIP_ADJ      0x0C
-#define _3DPRIM_TRISTRIP_REVERSE  0x0D
-#define _3DPRIM_POLYGON           0x0E
-#define _3DPRIM_RECTLIST          0x0F
-#define _3DPRIM_LINELOOP          0x10
-#define _3DPRIM_POINTLIST_BF      0x11
-#define _3DPRIM_LINESTRIP_CONT    0x12
-#define _3DPRIM_LINESTRIP_BF      0x13
-#define _3DPRIM_LINESTRIP_CONT_BF 0x14
-#define _3DPRIM_TRIFAN_NOSTIPPLE  0x15
+#define GEN6_LOGICOPFUNCTION_CLEAR		0
+#define GEN6_LOGICOPFUNCTION_NOR		1
+#define GEN6_LOGICOPFUNCTION_AND_INVERTED	2
+#define GEN6_LOGICOPFUNCTION_COPY_INVERTED	3
+#define GEN6_LOGICOPFUNCTION_AND_REVERSE	4
+#define GEN6_LOGICOPFUNCTION_INVERT		5
+#define GEN6_LOGICOPFUNCTION_XOR		6
+#define GEN6_LOGICOPFUNCTION_NAND		7
+#define GEN6_LOGICOPFUNCTION_AND		8
+#define GEN6_LOGICOPFUNCTION_EQUIV		9
+#define GEN6_LOGICOPFUNCTION_NOOP		10
+#define GEN6_LOGICOPFUNCTION_OR_INVERTED	11
+#define GEN6_LOGICOPFUNCTION_COPY		12
+#define GEN6_LOGICOPFUNCTION_OR_REVERSE		13
+#define GEN6_LOGICOPFUNCTION_OR			14
+#define GEN6_LOGICOPFUNCTION_SET		15
 
-#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
-#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     1
+#define GEN6_MAPFILTER_NEAREST			0x0
+#define GEN6_MAPFILTER_LINEAR			0x1
+#define GEN6_MAPFILTER_ANISOTROPIC		0x2
 
-#define GEN6_ANISORATIO_2     0 
-#define GEN6_ANISORATIO_4     1 
-#define GEN6_ANISORATIO_6     2 
-#define GEN6_ANISORATIO_8     3 
-#define GEN6_ANISORATIO_10    4 
-#define GEN6_ANISORATIO_12    5 
-#define GEN6_ANISORATIO_14    6 
-#define GEN6_ANISORATIO_16    7
+#define GEN6_MIPFILTER_NONE			0
+#define GEN6_MIPFILTER_NEAREST			1
+#define GEN6_MIPFILTER_LINEAR			3
 
-#define GEN6_BLENDFACTOR_ONE                 0x1
-#define GEN6_BLENDFACTOR_SRC_COLOR           0x2
-#define GEN6_BLENDFACTOR_SRC_ALPHA           0x3
-#define GEN6_BLENDFACTOR_DST_ALPHA           0x4
-#define GEN6_BLENDFACTOR_DST_COLOR           0x5
-#define GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE  0x6
-#define GEN6_BLENDFACTOR_CONST_COLOR         0x7
-#define GEN6_BLENDFACTOR_CONST_ALPHA         0x8
-#define GEN6_BLENDFACTOR_SRC1_COLOR          0x9
-#define GEN6_BLENDFACTOR_SRC1_ALPHA          0x0A
-#define GEN6_BLENDFACTOR_ZERO                0x11
-#define GEN6_BLENDFACTOR_INV_SRC_COLOR       0x12
-#define GEN6_BLENDFACTOR_INV_SRC_ALPHA       0x13
-#define GEN6_BLENDFACTOR_INV_DST_ALPHA       0x14
-#define GEN6_BLENDFACTOR_INV_DST_COLOR       0x15
-#define GEN6_BLENDFACTOR_INV_CONST_COLOR     0x17
-#define GEN6_BLENDFACTOR_INV_CONST_ALPHA     0x18
-#define GEN6_BLENDFACTOR_INV_SRC1_COLOR      0x19
-#define GEN6_BLENDFACTOR_INV_SRC1_ALPHA      0x1A
+#define GEN6_POLYGON_FRONT_FACING		0
+#define GEN6_POLYGON_BACK_FACING		1
 
-#define GEN6_BLENDFUNCTION_ADD               0
-#define GEN6_BLENDFUNCTION_SUBTRACT          1
-#define GEN6_BLENDFUNCTION_REVERSE_SUBTRACT  2
-#define GEN6_BLENDFUNCTION_MIN               3
-#define GEN6_BLENDFUNCTION_MAX               4
+#define GEN6_PREFILTER_ALWAYS			0x0
+#define GEN6_PREFILTER_NEVER			0x1
+#define GEN6_PREFILTER_LESS			0x2
+#define GEN6_PREFILTER_EQUAL			0x3
+#define GEN6_PREFILTER_LEQUAL			0x4
+#define GEN6_PREFILTER_GREATER			0x5
+#define GEN6_PREFILTER_NOTEQUAL			0x6
+#define GEN6_PREFILTER_GEQUAL			0x7
 
-#define GEN6_ALPHATEST_FORMAT_UNORM8         0
-#define GEN6_ALPHATEST_FORMAT_FLOAT32        1
+#define GEN6_PROVOKING_VERTEX_0			0
+#define GEN6_PROVOKING_VERTEX_1			1
+#define GEN6_PROVOKING_VERTEX_2			2
 
-#define GEN6_CHROMAKEY_KILL_ON_ANY_MATCH  0
-#define GEN6_CHROMAKEY_REPLACE_BLACK      1
+#define GEN6_RASTRULE_UPPER_LEFT		0
+#define GEN6_RASTRULE_UPPER_RIGHT		1
 
-#define GEN6_CLIP_API_OGL     0
-#define GEN6_CLIP_API_DX      1
+#define GEN6_RENDERTARGET_CLAMPRANGE_UNORM	0
+#define GEN6_RENDERTARGET_CLAMPRANGE_SNORM	1
+#define GEN6_RENDERTARGET_CLAMPRANGE_FORMAT	2
 
-#define GEN6_CLIPMODE_NORMAL              0
-#define GEN6_CLIPMODE_CLIP_ALL            1
-#define GEN6_CLIPMODE_CLIP_NON_REJECTED   2
-#define GEN6_CLIPMODE_REJECT_ALL          3
-#define GEN6_CLIPMODE_ACCEPT_ALL          4
+#define GEN6_STENCILOP_KEEP			0
+#define GEN6_STENCILOP_ZERO			1
+#define GEN6_STENCILOP_REPLACE			2
+#define GEN6_STENCILOP_INCRSAT			3
+#define GEN6_STENCILOP_DECRSAT			4
+#define GEN6_STENCILOP_INCR			5
+#define GEN6_STENCILOP_DECR			6
+#define GEN6_STENCILOP_INVERT			7
 
-#define GEN6_CLIP_NDCSPACE     0
-#define GEN6_CLIP_SCREENSPACE  1
-
-#define GEN6_COMPAREFUNCTION_ALWAYS       0
-#define GEN6_COMPAREFUNCTION_NEVER        1
-#define GEN6_COMPAREFUNCTION_LESS         2
-#define GEN6_COMPAREFUNCTION_EQUAL        3
-#define GEN6_COMPAREFUNCTION_LEQUAL       4
-#define GEN6_COMPAREFUNCTION_GREATER      5
-#define GEN6_COMPAREFUNCTION_NOTEQUAL     6
-#define GEN6_COMPAREFUNCTION_GEQUAL       7
-
-#define GEN6_COVERAGE_PIXELS_HALF     0
-#define GEN6_COVERAGE_PIXELS_1        1
-#define GEN6_COVERAGE_PIXELS_2        2
-#define GEN6_COVERAGE_PIXELS_4        3
-
-#define GEN6_CULLMODE_BOTH        0
-#define GEN6_CULLMODE_NONE        1
-#define GEN6_CULLMODE_FRONT       2
-#define GEN6_CULLMODE_BACK        3
-
-#define GEN6_DEFAULTCOLOR_R8G8B8A8_UNORM      0
-#define GEN6_DEFAULTCOLOR_R32G32B32A32_FLOAT  1
-
-#define GEN6_DEPTHFORMAT_D32_FLOAT_S8X24_UINT     0
-#define GEN6_DEPTHFORMAT_D32_FLOAT                1
-#define GEN6_DEPTHFORMAT_D24_UNORM_S8_UINT        2
-#define GEN6_DEPTHFORMAT_D16_UNORM                5
-
-#define GEN6_FLOATING_POINT_IEEE_754        0
-#define GEN6_FLOATING_POINT_NON_IEEE_754    1
-
-#define GEN6_FRONTWINDING_CW      0
-#define GEN6_FRONTWINDING_CCW     1
-
-#define GEN6_INDEX_BYTE     0
-#define GEN6_INDEX_WORD     1
-#define GEN6_INDEX_DWORD    2
-
-#define GEN6_LOGICOPFUNCTION_CLEAR            0
-#define GEN6_LOGICOPFUNCTION_NOR              1
-#define GEN6_LOGICOPFUNCTION_AND_INVERTED     2
-#define GEN6_LOGICOPFUNCTION_COPY_INVERTED    3
-#define GEN6_LOGICOPFUNCTION_AND_REVERSE      4
-#define GEN6_LOGICOPFUNCTION_INVERT           5
-#define GEN6_LOGICOPFUNCTION_XOR              6
-#define GEN6_LOGICOPFUNCTION_NAND             7
-#define GEN6_LOGICOPFUNCTION_AND              8
-#define GEN6_LOGICOPFUNCTION_EQUIV            9
-#define GEN6_LOGICOPFUNCTION_NOOP             10
-#define GEN6_LOGICOPFUNCTION_OR_INVERTED      11
-#define GEN6_LOGICOPFUNCTION_COPY             12
-#define GEN6_LOGICOPFUNCTION_OR_REVERSE       13
-#define GEN6_LOGICOPFUNCTION_OR               14
-#define GEN6_LOGICOPFUNCTION_SET              15  
-
-#define GEN6_MAPFILTER_NEAREST        0x0 
-#define GEN6_MAPFILTER_LINEAR         0x1 
-#define GEN6_MAPFILTER_ANISOTROPIC    0x2
-
-#define GEN6_MIPFILTER_NONE        0   
-#define GEN6_MIPFILTER_NEAREST     1   
-#define GEN6_MIPFILTER_LINEAR      3
-
-#define GEN6_POLYGON_FRONT_FACING     0
-#define GEN6_POLYGON_BACK_FACING      1
-
-#define GEN6_PREFILTER_ALWAYS     0x0 
-#define GEN6_PREFILTER_NEVER      0x1
-#define GEN6_PREFILTER_LESS       0x2
-#define GEN6_PREFILTER_EQUAL      0x3
-#define GEN6_PREFILTER_LEQUAL     0x4
-#define GEN6_PREFILTER_GREATER    0x5
-#define GEN6_PREFILTER_NOTEQUAL   0x6
-#define GEN6_PREFILTER_GEQUAL     0x7
-
-#define GEN6_PROVOKING_VERTEX_0    0
-#define GEN6_PROVOKING_VERTEX_1    1 
-#define GEN6_PROVOKING_VERTEX_2    2
-
-#define GEN6_RASTRULE_UPPER_LEFT  0    
-#define GEN6_RASTRULE_UPPER_RIGHT 1
-
-#define GEN6_RENDERTARGET_CLAMPRANGE_UNORM    0
-#define GEN6_RENDERTARGET_CLAMPRANGE_SNORM    1
-#define GEN6_RENDERTARGET_CLAMPRANGE_FORMAT   2
-
-#define GEN6_STENCILOP_KEEP               0
-#define GEN6_STENCILOP_ZERO               1
-#define GEN6_STENCILOP_REPLACE            2
-#define GEN6_STENCILOP_INCRSAT            3
-#define GEN6_STENCILOP_DECRSAT            4
-#define GEN6_STENCILOP_INCR               5
-#define GEN6_STENCILOP_DECR               6
-#define GEN6_STENCILOP_INVERT             7
-
-#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW   0
-#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT   1
+#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW		0
+#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT		1
 
 #define GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT             0x000
 #define GEN6_SURFACEFORMAT_R32G32B32A32_SINT              0x001
@@ -926,258 +664,254 @@
 #define GEN6_SURFACEFORMAT_R16G16B16_SSCALED              0x19E
 #define GEN6_SURFACEFORMAT_R16G16B16_USCALED              0x19F
 
-#define GEN6_SURFACERETURNFORMAT_FLOAT32  0
-#define GEN6_SURFACERETURNFORMAT_S1       1
+#define GEN6_SURFACERETURNFORMAT_FLOAT32	0
+#define GEN6_SURFACERETURNFORMAT_S1		1
 
-#define GEN6_SURFACE_1D      0
-#define GEN6_SURFACE_2D      1
-#define GEN6_SURFACE_3D      2
-#define GEN6_SURFACE_CUBE    3
-#define GEN6_SURFACE_BUFFER  4
-#define GEN6_SURFACE_NULL    7
+#define GEN6_SURFACE_1D				0
+#define GEN6_SURFACE_2D				1
+#define GEN6_SURFACE_3D				2
+#define GEN6_SURFACE_CUBE			3
+#define GEN6_SURFACE_BUFFER			4
+#define GEN6_SURFACE_NULL			7
 
-#define GEN6_BORDER_COLOR_MODE_DEFAULT	0
-#define GEN6_BORDER_COLOR_MODE_LEGACY	1
+#define GEN6_BORDER_COLOR_MODE_DEFAULT		0
+#define GEN6_BORDER_COLOR_MODE_LEGACY		1
 
-#define GEN6_TEXCOORDMODE_WRAP            0
-#define GEN6_TEXCOORDMODE_MIRROR          1
-#define GEN6_TEXCOORDMODE_CLAMP           2
-#define GEN6_TEXCOORDMODE_CUBE            3
-#define GEN6_TEXCOORDMODE_CLAMP_BORDER    4
-#define GEN6_TEXCOORDMODE_MIRROR_ONCE     5
+#define GEN6_TEXCOORDMODE_WRAP			0
+#define GEN6_TEXCOORDMODE_MIRROR		1
+#define GEN6_TEXCOORDMODE_CLAMP			2
+#define GEN6_TEXCOORDMODE_CUBE			3
+#define GEN6_TEXCOORDMODE_CLAMP_BORDER		4
+#define GEN6_TEXCOORDMODE_MIRROR_ONCE		5
 
-#define GEN6_THREAD_PRIORITY_NORMAL   0
-#define GEN6_THREAD_PRIORITY_HIGH     1
+#define GEN6_THREAD_PRIORITY_NORMAL		0
+#define GEN6_THREAD_PRIORITY_HIGH		1
 
-#define GEN6_TILEWALK_XMAJOR                 0
-#define GEN6_TILEWALK_YMAJOR                 1
+#define GEN6_TILEWALK_XMAJOR			0
+#define GEN6_TILEWALK_YMAJOR			1
 
-#define GEN6_VERTEX_SUBPIXEL_PRECISION_8BITS  0
-#define GEN6_VERTEX_SUBPIXEL_PRECISION_4BITS  1
+#define GEN6_VERTEX_SUBPIXEL_PRECISION_8BITS	0
+#define GEN6_VERTEX_SUBPIXEL_PRECISION_4BITS	1
 
 #define GEN6_VERTEXBUFFER_ACCESS_VERTEXDATA     0
 #define GEN6_VERTEXBUFFER_ACCESS_INSTANCEDATA   1
 
-#define GEN6_VFCOMPONENT_NOSTORE      0
-#define GEN6_VFCOMPONENT_STORE_SRC    1
-#define GEN6_VFCOMPONENT_STORE_0      2
-#define GEN6_VFCOMPONENT_STORE_1_FLT  3
-#define GEN6_VFCOMPONENT_STORE_1_INT  4
-#define GEN6_VFCOMPONENT_STORE_VID    5
-#define GEN6_VFCOMPONENT_STORE_IID    6
-#define GEN6_VFCOMPONENT_STORE_PID    7
-
-
+#define GEN6_VFCOMPONENT_NOSTORE		0
+#define GEN6_VFCOMPONENT_STORE_SRC		1
+#define GEN6_VFCOMPONENT_STORE_0		2
+#define GEN6_VFCOMPONENT_STORE_1_FLT		3
+#define GEN6_VFCOMPONENT_STORE_1_INT		4
+#define GEN6_VFCOMPONENT_STORE_VID		5
+#define GEN6_VFCOMPONENT_STORE_IID		6
+#define GEN6_VFCOMPONENT_STORE_PID		7
 
 /* Execution Unit (EU) defines
  */
 
-#define GEN6_ALIGN_1   0
-#define GEN6_ALIGN_16  1
+#define GEN6_ALIGN_1				0
+#define GEN6_ALIGN_16				1
 
-#define GEN6_ADDRESS_DIRECT                        0
-#define GEN6_ADDRESS_REGISTER_INDIRECT_REGISTER    1
+#define GEN6_ADDRESS_DIRECT			0
+#define GEN6_ADDRESS_REGISTER_INDIRECT_REGISTER	1
 
-#define GEN6_CHANNEL_X     0
-#define GEN6_CHANNEL_Y     1
-#define GEN6_CHANNEL_Z     2
-#define GEN6_CHANNEL_W     3
+#define GEN6_CHANNEL_X				0
+#define GEN6_CHANNEL_Y				1
+#define GEN6_CHANNEL_Z				2
+#define GEN6_CHANNEL_W				3
 
-#define GEN6_COMPRESSION_NONE          0
-#define GEN6_COMPRESSION_2NDHALF       1
-#define GEN6_COMPRESSION_COMPRESSED    2
+#define GEN6_COMPRESSION_NONE			0
+#define GEN6_COMPRESSION_2NDHALF		1
+#define GEN6_COMPRESSION_COMPRESSED		2
 
-#define GEN6_CONDITIONAL_NONE  0
-#define GEN6_CONDITIONAL_Z     1
-#define GEN6_CONDITIONAL_NZ    2
-#define GEN6_CONDITIONAL_EQ    1	/* Z */
-#define GEN6_CONDITIONAL_NEQ   2	/* NZ */
-#define GEN6_CONDITIONAL_G     3
-#define GEN6_CONDITIONAL_GE    4
-#define GEN6_CONDITIONAL_L     5
-#define GEN6_CONDITIONAL_LE    6
-#define GEN6_CONDITIONAL_C     7
-#define GEN6_CONDITIONAL_O     8
+#define GEN6_CONDITIONAL_NONE			0
+#define GEN6_CONDITIONAL_Z			1
+#define GEN6_CONDITIONAL_NZ			2
+#define GEN6_CONDITIONAL_EQ			1	/* Z */
+#define GEN6_CONDITIONAL_NEQ			2	/* NZ */
+#define GEN6_CONDITIONAL_G			3
+#define GEN6_CONDITIONAL_GE			4
+#define GEN6_CONDITIONAL_L			5
+#define GEN6_CONDITIONAL_LE			6
+#define GEN6_CONDITIONAL_C			7
+#define GEN6_CONDITIONAL_O			8
 
-#define GEN6_DEBUG_NONE        0
-#define GEN6_DEBUG_BREAKPOINT  1
+#define GEN6_DEBUG_NONE				0
+#define GEN6_DEBUG_BREAKPOINT			1
 
-#define GEN6_DEPENDENCY_NORMAL         0
-#define GEN6_DEPENDENCY_NOTCLEARED     1
-#define GEN6_DEPENDENCY_NOTCHECKED     2
-#define GEN6_DEPENDENCY_DISABLE        3
+#define GEN6_DEPENDENCY_NORMAL			0
+#define GEN6_DEPENDENCY_NOTCLEARED		1
+#define GEN6_DEPENDENCY_NOTCHECKED		2
+#define GEN6_DEPENDENCY_DISABLE			3
 
-#define GEN6_EXECUTE_1     0
-#define GEN6_EXECUTE_2     1
-#define GEN6_EXECUTE_4     2
-#define GEN6_EXECUTE_8     3
-#define GEN6_EXECUTE_16    4
-#define GEN6_EXECUTE_32    5
+#define GEN6_EXECUTE_1				0
+#define GEN6_EXECUTE_2				1
+#define GEN6_EXECUTE_4				2
+#define GEN6_EXECUTE_8				3
+#define GEN6_EXECUTE_16				4
+#define GEN6_EXECUTE_32				5
 
-#define GEN6_HORIZONTAL_STRIDE_0   0
-#define GEN6_HORIZONTAL_STRIDE_1   1
-#define GEN6_HORIZONTAL_STRIDE_2   2
-#define GEN6_HORIZONTAL_STRIDE_4   3
+#define GEN6_HORIZONTAL_STRIDE_0		0
+#define GEN6_HORIZONTAL_STRIDE_1		1
+#define GEN6_HORIZONTAL_STRIDE_2		2
+#define GEN6_HORIZONTAL_STRIDE_4		3
 
-#define GEN6_INSTRUCTION_NORMAL    0
-#define GEN6_INSTRUCTION_SATURATE  1
+#define GEN6_INSTRUCTION_NORMAL			0
+#define GEN6_INSTRUCTION_SATURATE		1
 
-#define GEN6_MASK_ENABLE   0
-#define GEN6_MASK_DISABLE  1
+#define GEN6_MASK_ENABLE			0
+#define GEN6_MASK_DISABLE			1
 
-#define GEN6_OPCODE_MOV        1
-#define GEN6_OPCODE_SEL        2
-#define GEN6_OPCODE_NOT        4
-#define GEN6_OPCODE_AND        5
-#define GEN6_OPCODE_OR         6
-#define GEN6_OPCODE_XOR        7
-#define GEN6_OPCODE_SHR        8
-#define GEN6_OPCODE_SHL        9
-#define GEN6_OPCODE_RSR        10
-#define GEN6_OPCODE_RSL        11
-#define GEN6_OPCODE_ASR        12
-#define GEN6_OPCODE_CMP        16
-#define GEN6_OPCODE_JMPI       32
-#define GEN6_OPCODE_IF         34
-#define GEN6_OPCODE_IFF        35
-#define GEN6_OPCODE_ELSE       36
-#define GEN6_OPCODE_ENDIF      37
-#define GEN6_OPCODE_DO         38
-#define GEN6_OPCODE_WHILE      39
-#define GEN6_OPCODE_BREAK      40
-#define GEN6_OPCODE_CONTINUE   41
-#define GEN6_OPCODE_HALT       42
-#define GEN6_OPCODE_MSAVE      44
-#define GEN6_OPCODE_MRESTORE   45
-#define GEN6_OPCODE_PUSH       46
-#define GEN6_OPCODE_POP        47
-#define GEN6_OPCODE_WAIT       48
-#define GEN6_OPCODE_SEND       49
-#define GEN6_OPCODE_ADD        64
-#define GEN6_OPCODE_MUL        65
-#define GEN6_OPCODE_AVG        66
-#define GEN6_OPCODE_FRC        67
-#define GEN6_OPCODE_RNDU       68
-#define GEN6_OPCODE_RNDD       69
-#define GEN6_OPCODE_RNDE       70
-#define GEN6_OPCODE_RNDZ       71
-#define GEN6_OPCODE_MAC        72
-#define GEN6_OPCODE_MACH       73
-#define GEN6_OPCODE_LZD        74
-#define GEN6_OPCODE_SAD2       80
-#define GEN6_OPCODE_SADA2      81
-#define GEN6_OPCODE_DP4        84
-#define GEN6_OPCODE_DPH        85
-#define GEN6_OPCODE_DP3        86
-#define GEN6_OPCODE_DP2        87
-#define GEN6_OPCODE_DPA2       88
-#define GEN6_OPCODE_LINE       89
-#define GEN6_OPCODE_NOP        126
+#define GEN6_OPCODE_MOV				1
+#define GEN6_OPCODE_SEL				2
+#define GEN6_OPCODE_NOT				4
+#define GEN6_OPCODE_AND				5
+#define GEN6_OPCODE_OR				6
+#define GEN6_OPCODE_XOR				7
+#define GEN6_OPCODE_SHR				8
+#define GEN6_OPCODE_SHL				9
+#define GEN6_OPCODE_RSR				10
+#define GEN6_OPCODE_RSL				11
+#define GEN6_OPCODE_ASR				12
+#define GEN6_OPCODE_CMP				16
+#define GEN6_OPCODE_JMPI			32
+#define GEN6_OPCODE_IF				34
+#define GEN6_OPCODE_IFF				35
+#define GEN6_OPCODE_ELSE			36
+#define GEN6_OPCODE_ENDIF			37
+#define GEN6_OPCODE_DO				38
+#define GEN6_OPCODE_WHILE			39
+#define GEN6_OPCODE_BREAK			40
+#define GEN6_OPCODE_CONTINUE			41
+#define GEN6_OPCODE_HALT			42
+#define GEN6_OPCODE_MSAVE			44
+#define GEN6_OPCODE_MRESTORE			45
+#define GEN6_OPCODE_PUSH			46
+#define GEN6_OPCODE_POP				47
+#define GEN6_OPCODE_WAIT			48
+#define GEN6_OPCODE_SEND			49
+#define GEN6_OPCODE_ADD				64
+#define GEN6_OPCODE_MUL				65
+#define GEN6_OPCODE_AVG				66
+#define GEN6_OPCODE_FRC				67
+#define GEN6_OPCODE_RNDU			68
+#define GEN6_OPCODE_RNDD			69
+#define GEN6_OPCODE_RNDE			70
+#define GEN6_OPCODE_RNDZ			71
+#define GEN6_OPCODE_MAC				72
+#define GEN6_OPCODE_MACH			73
+#define GEN6_OPCODE_LZD				74
+#define GEN6_OPCODE_SAD2			80
+#define GEN6_OPCODE_SADA2			81
+#define GEN6_OPCODE_DP4				84
+#define GEN6_OPCODE_DPH				85
+#define GEN6_OPCODE_DP3				86
+#define GEN6_OPCODE_DP2				87
+#define GEN6_OPCODE_DPA2			88
+#define GEN6_OPCODE_LINE			89
+#define GEN6_OPCODE_NOP				126
 
-#define GEN6_PREDICATE_NONE             0
-#define GEN6_PREDICATE_NORMAL           1
-#define GEN6_PREDICATE_ALIGN1_ANYV             2
-#define GEN6_PREDICATE_ALIGN1_ALLV             3
-#define GEN6_PREDICATE_ALIGN1_ANY2H            4
-#define GEN6_PREDICATE_ALIGN1_ALL2H            5
-#define GEN6_PREDICATE_ALIGN1_ANY4H            6
-#define GEN6_PREDICATE_ALIGN1_ALL4H            7
-#define GEN6_PREDICATE_ALIGN1_ANY8H            8
-#define GEN6_PREDICATE_ALIGN1_ALL8H            9
-#define GEN6_PREDICATE_ALIGN1_ANY16H           10
-#define GEN6_PREDICATE_ALIGN1_ALL16H           11
-#define GEN6_PREDICATE_ALIGN16_REPLICATE_X     2
-#define GEN6_PREDICATE_ALIGN16_REPLICATE_Y     3
-#define GEN6_PREDICATE_ALIGN16_REPLICATE_Z     4
-#define GEN6_PREDICATE_ALIGN16_REPLICATE_W     5
-#define GEN6_PREDICATE_ALIGN16_ANY4H           6
-#define GEN6_PREDICATE_ALIGN16_ALL4H           7
+#define GEN6_PREDICATE_NONE			0
+#define GEN6_PREDICATE_NORMAL			1
+#define GEN6_PREDICATE_ALIGN1_ANYV		2
+#define GEN6_PREDICATE_ALIGN1_ALLV		3
+#define GEN6_PREDICATE_ALIGN1_ANY2H		4
+#define GEN6_PREDICATE_ALIGN1_ALL2H		5
+#define GEN6_PREDICATE_ALIGN1_ANY4H		6
+#define GEN6_PREDICATE_ALIGN1_ALL4H		7
+#define GEN6_PREDICATE_ALIGN1_ANY8H		8
+#define GEN6_PREDICATE_ALIGN1_ALL8H		9
+#define GEN6_PREDICATE_ALIGN1_ANY16H		10
+#define GEN6_PREDICATE_ALIGN1_ALL16H		11
+#define GEN6_PREDICATE_ALIGN16_REPLICATE_X	2
+#define GEN6_PREDICATE_ALIGN16_REPLICATE_Y	3
+#define GEN6_PREDICATE_ALIGN16_REPLICATE_Z	4
+#define GEN6_PREDICATE_ALIGN16_REPLICATE_W	5
+#define GEN6_PREDICATE_ALIGN16_ANY4H		6
+#define GEN6_PREDICATE_ALIGN16_ALL4H		7
 
-#define GEN6_ARCHITECTURE_REGISTER_FILE    0
-#define GEN6_GENERAL_REGISTER_FILE         1
-#define GEN6_MESSAGE_REGISTER_FILE         2
-#define GEN6_IMMEDIATE_VALUE               3
+#define GEN6_ARCHITECTURE_REGISTER_FILE		0
+#define GEN6_GENERAL_REGISTER_FILE		1
+#define GEN6_MESSAGE_REGISTER_FILE		2
+#define GEN6_IMMEDIATE_VALUE			3
 
-#define GEN6_REGISTER_TYPE_UD  0
-#define GEN6_REGISTER_TYPE_D   1
-#define GEN6_REGISTER_TYPE_UW  2
-#define GEN6_REGISTER_TYPE_W   3
-#define GEN6_REGISTER_TYPE_UB  4
-#define GEN6_REGISTER_TYPE_B   5
-#define GEN6_REGISTER_TYPE_VF  5	/* packed float vector, immediates only? */
-#define GEN6_REGISTER_TYPE_HF  6
-#define GEN6_REGISTER_TYPE_V   6	/* packed int vector, immediates only, uword dest only */
-#define GEN6_REGISTER_TYPE_F   7
+#define GEN6_REGISTER_TYPE_UD			0
+#define GEN6_REGISTER_TYPE_D			1
+#define GEN6_REGISTER_TYPE_UW			2
+#define GEN6_REGISTER_TYPE_W			3
+#define GEN6_REGISTER_TYPE_UB			4
+#define GEN6_REGISTER_TYPE_B			5
+#define GEN6_REGISTER_TYPE_VF			5	/* packed float vector, immediates only? */
+#define GEN6_REGISTER_TYPE_HF			6
+#define GEN6_REGISTER_TYPE_V			6	/* packed int vector, immediates only, uword dest only */
+#define GEN6_REGISTER_TYPE_F			7
 
-#define GEN6_ARF_NULL                  0x00
-#define GEN6_ARF_ADDRESS               0x10
-#define GEN6_ARF_ACCUMULATOR           0x20   
-#define GEN6_ARF_FLAG                  0x30
-#define GEN6_ARF_MASK                  0x40
-#define GEN6_ARF_MASK_STACK            0x50
-#define GEN6_ARF_MASK_STACK_DEPTH      0x60
-#define GEN6_ARF_STATE                 0x70
-#define GEN6_ARF_CONTROL               0x80
-#define GEN6_ARF_NOTIFICATION_COUNT    0x90
-#define GEN6_ARF_IP                    0xA0
+#define GEN6_ARF_NULL				0x00
+#define GEN6_ARF_ADDRESS			0x10
+#define GEN6_ARF_ACCUMULATOR			0x20
+#define GEN6_ARF_FLAG				0x30
+#define GEN6_ARF_MASK				0x40
+#define GEN6_ARF_MASK_STACK			0x50
+#define GEN6_ARF_MASK_STACK_DEPTH		0x60
+#define GEN6_ARF_STATE				0x70
+#define GEN6_ARF_CONTROL			0x80
+#define GEN6_ARF_NOTIFICATION_COUNT		0x90
+#define GEN6_ARF_IP				0xA0
 
-#define GEN6_AMASK   0
-#define GEN6_IMASK   1
-#define GEN6_LMASK   2
-#define GEN6_CMASK   3
+#define GEN6_AMASK				0
+#define GEN6_IMASK				1
+#define GEN6_LMASK				2
+#define GEN6_CMASK				3
 
+#define GEN6_THREAD_NORMAL			0
+#define GEN6_THREAD_ATOMIC			1
+#define GEN6_THREAD_SWITCH			2
 
+#define GEN6_VERTICAL_STRIDE_0			0
+#define GEN6_VERTICAL_STRIDE_1			1
+#define GEN6_VERTICAL_STRIDE_2			2
+#define GEN6_VERTICAL_STRIDE_4			3
+#define GEN6_VERTICAL_STRIDE_8			4
+#define GEN6_VERTICAL_STRIDE_16			5
+#define GEN6_VERTICAL_STRIDE_32			6
+#define GEN6_VERTICAL_STRIDE_64			7
+#define GEN6_VERTICAL_STRIDE_128		8
+#define GEN6_VERTICAL_STRIDE_256		9
+#define GEN6_VERTICAL_STRIDE_ONE_DIMENSIONAL	0xF
 
-#define GEN6_THREAD_NORMAL     0
-#define GEN6_THREAD_ATOMIC     1
-#define GEN6_THREAD_SWITCH     2
+#define GEN6_WIDTH_1				0
+#define GEN6_WIDTH_2				1
+#define GEN6_WIDTH_4				2
+#define GEN6_WIDTH_8				3
+#define GEN6_WIDTH_16				4
 
-#define GEN6_VERTICAL_STRIDE_0                 0
-#define GEN6_VERTICAL_STRIDE_1                 1
-#define GEN6_VERTICAL_STRIDE_2                 2
-#define GEN6_VERTICAL_STRIDE_4                 3
-#define GEN6_VERTICAL_STRIDE_8                 4
-#define GEN6_VERTICAL_STRIDE_16                5
-#define GEN6_VERTICAL_STRIDE_32                6
-#define GEN6_VERTICAL_STRIDE_64                7
-#define GEN6_VERTICAL_STRIDE_128               8
-#define GEN6_VERTICAL_STRIDE_256               9
-#define GEN6_VERTICAL_STRIDE_ONE_DIMENSIONAL   0xF
+#define GEN6_STATELESS_BUFFER_BOUNDARY_1K	0
+#define GEN6_STATELESS_BUFFER_BOUNDARY_2K	1
+#define GEN6_STATELESS_BUFFER_BOUNDARY_4K	2
+#define GEN6_STATELESS_BUFFER_BOUNDARY_8K	3
+#define GEN6_STATELESS_BUFFER_BOUNDARY_16K	4
+#define GEN6_STATELESS_BUFFER_BOUNDARY_32K	5
+#define GEN6_STATELESS_BUFFER_BOUNDARY_64K	6
+#define GEN6_STATELESS_BUFFER_BOUNDARY_128K	7
+#define GEN6_STATELESS_BUFFER_BOUNDARY_256K	8
+#define GEN6_STATELESS_BUFFER_BOUNDARY_512K	9
+#define GEN6_STATELESS_BUFFER_BOUNDARY_1M	10
+#define GEN6_STATELESS_BUFFER_BOUNDARY_2M	11
 
-#define GEN6_WIDTH_1       0
-#define GEN6_WIDTH_2       1
-#define GEN6_WIDTH_4       2
-#define GEN6_WIDTH_8       3
-#define GEN6_WIDTH_16      4
+#define GEN6_POLYGON_FACING_FRONT		0
+#define GEN6_POLYGON_FACING_BACK		1
 
-#define GEN6_STATELESS_BUFFER_BOUNDARY_1K      0
-#define GEN6_STATELESS_BUFFER_BOUNDARY_2K      1
-#define GEN6_STATELESS_BUFFER_BOUNDARY_4K      2
-#define GEN6_STATELESS_BUFFER_BOUNDARY_8K      3
-#define GEN6_STATELESS_BUFFER_BOUNDARY_16K     4
-#define GEN6_STATELESS_BUFFER_BOUNDARY_32K     5
-#define GEN6_STATELESS_BUFFER_BOUNDARY_64K     6
-#define GEN6_STATELESS_BUFFER_BOUNDARY_128K    7
-#define GEN6_STATELESS_BUFFER_BOUNDARY_256K    8
-#define GEN6_STATELESS_BUFFER_BOUNDARY_512K    9
-#define GEN6_STATELESS_BUFFER_BOUNDARY_1M      10
-#define GEN6_STATELESS_BUFFER_BOUNDARY_2M      11
+#define GEN6_MESSAGE_TARGET_NULL		0
+#define GEN6_MESSAGE_TARGET_MATH		1
+#define GEN6_MESSAGE_TARGET_SAMPLER		2
+#define GEN6_MESSAGE_TARGET_GATEWAY		3
+#define GEN6_MESSAGE_TARGET_DATAPORT_READ	4
+#define GEN6_MESSAGE_TARGET_DATAPORT_WRITE	5
+#define GEN6_MESSAGE_TARGET_URB			6
+#define GEN6_MESSAGE_TARGET_THREAD_SPAWNER	7
 
-#define GEN6_POLYGON_FACING_FRONT      0
-#define GEN6_POLYGON_FACING_BACK       1
-
-#define GEN6_MESSAGE_TARGET_NULL               0
-#define GEN6_MESSAGE_TARGET_MATH               1
-#define GEN6_MESSAGE_TARGET_SAMPLER            2
-#define GEN6_MESSAGE_TARGET_GATEWAY            3
-#define GEN6_MESSAGE_TARGET_DATAPORT_READ      4
-#define GEN6_MESSAGE_TARGET_DATAPORT_WRITE     5
-#define GEN6_MESSAGE_TARGET_URB                6
-#define GEN6_MESSAGE_TARGET_THREAD_SPAWNER     7
-
-#define GEN6_SAMPLER_RETURN_FORMAT_FLOAT32     0
-#define GEN6_SAMPLER_RETURN_FORMAT_UINT32      2
-#define GEN6_SAMPLER_RETURN_FORMAT_SINT32      3
+#define GEN6_SAMPLER_RETURN_FORMAT_FLOAT32	0
+#define GEN6_SAMPLER_RETURN_FORMAT_UINT32	2
+#define GEN6_SAMPLER_RETURN_FORMAT_SINT32	3
 
 #define GEN6_SAMPLER_MESSAGE_SIMD8_SAMPLE              0
 #define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE             0
@@ -1196,26 +930,26 @@
 #define GEN6_SAMPLER_MESSAGE_SIMD8_LD                  3
 #define GEN6_SAMPLER_MESSAGE_SIMD16_LD                 3
 
-#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0
-#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1
-#define GEN6_DATAPORT_OWORD_BLOCK_2_OWORDS     2
-#define GEN6_DATAPORT_OWORD_BLOCK_4_OWORDS     3
-#define GEN6_DATAPORT_OWORD_BLOCK_8_OWORDS     4
+#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDLOW	0
+#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 	1
+#define GEN6_DATAPORT_OWORD_BLOCK_2_OWORDS	2
+#define GEN6_DATAPORT_OWORD_BLOCK_4_OWORDS	3
+#define GEN6_DATAPORT_OWORD_BLOCK_8_OWORDS	4
 
-#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_1OWORD     0
-#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS    2
+#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_1OWORD	0
+#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS	2
 
-#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS   2
-#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS  3
+#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS	2
+#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS	3
 
-#define GEN6_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ          0
-#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     1
-#define GEN6_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ          2
-#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      3
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ		0
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ	1
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ		2
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ		3
 
-#define GEN6_DATAPORT_READ_TARGET_DATA_CACHE      0
-#define GEN6_DATAPORT_READ_TARGET_RENDER_CACHE    1
-#define GEN6_DATAPORT_READ_TARGET_SAMPLER_CACHE   2
+#define GEN6_DATAPORT_READ_TARGET_DATA_CACHE	0
+#define GEN6_DATAPORT_READ_TARGET_RENDER_CACHE	1
+#define GEN6_DATAPORT_READ_TARGET_SAMPLER_CACHE	2
 
 #define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE                0
 #define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED     1
@@ -1245,36 +979,36 @@
 #define GEN6_MATH_FUNCTION_INT_DIV_QUOTIENT                 12
 #define GEN6_MATH_FUNCTION_INT_DIV_REMAINDER                13
 
-#define GEN6_MATH_INTEGER_UNSIGNED     0
-#define GEN6_MATH_INTEGER_SIGNED       1
+#define GEN6_MATH_INTEGER_UNSIGNED		0
+#define GEN6_MATH_INTEGER_SIGNED		1
 
-#define GEN6_MATH_PRECISION_FULL        0
-#define GEN6_MATH_PRECISION_PARTIAL     1
+#define GEN6_MATH_PRECISION_FULL		0
+#define GEN6_MATH_PRECISION_PARTIAL		1
 
-#define GEN6_MATH_SATURATE_NONE         0
-#define GEN6_MATH_SATURATE_SATURATE     1
+#define GEN6_MATH_SATURATE_NONE			0
+#define GEN6_MATH_SATURATE_SATURATE		1
 
-#define GEN6_MATH_DATA_VECTOR  0
-#define GEN6_MATH_DATA_SCALAR  1
+#define GEN6_MATH_DATA_VECTOR			0
+#define GEN6_MATH_DATA_SCALAR			1
 
-#define GEN6_URB_OPCODE_WRITE  0
+#define GEN6_URB_OPCODE_WRITE			0
 
-#define GEN6_URB_SWIZZLE_NONE          0
-#define GEN6_URB_SWIZZLE_INTERLEAVE    1
-#define GEN6_URB_SWIZZLE_TRANSPOSE     2
+#define GEN6_URB_SWIZZLE_NONE			0
+#define GEN6_URB_SWIZZLE_INTERLEAVE		1
+#define GEN6_URB_SWIZZLE_TRANSPOSE		2
 
-#define GEN6_SCRATCH_SPACE_SIZE_1K     0
-#define GEN6_SCRATCH_SPACE_SIZE_2K     1
-#define GEN6_SCRATCH_SPACE_SIZE_4K     2
-#define GEN6_SCRATCH_SPACE_SIZE_8K     3
-#define GEN6_SCRATCH_SPACE_SIZE_16K    4
-#define GEN6_SCRATCH_SPACE_SIZE_32K    5
-#define GEN6_SCRATCH_SPACE_SIZE_64K    6
-#define GEN6_SCRATCH_SPACE_SIZE_128K   7
-#define GEN6_SCRATCH_SPACE_SIZE_256K   8
-#define GEN6_SCRATCH_SPACE_SIZE_512K   9
-#define GEN6_SCRATCH_SPACE_SIZE_1M     10
-#define GEN6_SCRATCH_SPACE_SIZE_2M     11
+#define GEN6_SCRATCH_SPACE_SIZE_1K		0
+#define GEN6_SCRATCH_SPACE_SIZE_2K		1
+#define GEN6_SCRATCH_SPACE_SIZE_4K		2
+#define GEN6_SCRATCH_SPACE_SIZE_8K		3
+#define GEN6_SCRATCH_SPACE_SIZE_16K		4
+#define GEN6_SCRATCH_SPACE_SIZE_32K		5
+#define GEN6_SCRATCH_SPACE_SIZE_64K		6
+#define GEN6_SCRATCH_SPACE_SIZE_128K		7
+#define GEN6_SCRATCH_SPACE_SIZE_256K		8
+#define GEN6_SCRATCH_SPACE_SIZE_512K		9
+#define GEN6_SCRATCH_SPACE_SIZE_1M		10
+#define GEN6_SCRATCH_SPACE_SIZE_2M		11
 
 /* The hardware supports two different modes for border color. The
  * default (OpenGL) mode uses floating-point color channels, while the
-- 
2.9.5

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for lib/gen6_render: Refactoring lib
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
@ 2018-04-09 16:49 ` Patchwork
  2018-04-09 22:07 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
                   ` (14 subsequent siblings)
  15 siblings, 0 replies; 28+ messages in thread
From: Patchwork @ 2018-04-09 16:49 UTC (permalink / raw)
  To: Lukasz Kalamarz; +Cc: igt-dev

== Series Details ==

Series: lib/gen6_render: Refactoring lib
URL   : https://patchwork.freedesktop.org/series/41379/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
7c474e011548d35df6b80ceed81d3e6ca560c71d tests/perf: fix gen8 small cores whitelist expectation

with latest DRM-Tip kernel build CI_DRM_4037
1cda370ffded drm-tip: 2018y-04m-09d-14h-03m-58s UTC integration manifest

No testlist changes.

---- Possible new issues:

Test gem_exec_gttfill:
        Subgroup basic:
                skip       -> PASS       (fi-pnv-d510)

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:439s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:443s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:384s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:537s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:298s
fi-bxt-dsi       total:285  pass:255  dwarn:0   dfail:0   fail:0   skip:30  time:515s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:517s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:526s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:514s
fi-cfl-8700k     total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:409s
fi-cfl-s3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:560s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:512s
fi-cnl-y3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:588s
fi-elk-e7500     total:285  pass:226  dwarn:0   dfail:0   fail:0   skip:59  time:425s
fi-gdg-551       total:285  pass:176  dwarn:0   dfail:0   fail:1   skip:108 time:316s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:539s
fi-glk-j4005     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:487s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:405s
fi-ilk-650       total:285  pass:225  dwarn:0   dfail:0   fail:0   skip:60  time:424s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:468s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:435s
fi-kbl-7500u     total:285  pass:260  dwarn:1   dfail:0   fail:0   skip:24  time:473s
fi-kbl-7567u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:464s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:511s
fi-pnv-d510      total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:646s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:448s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:532s
fi-skl-6700k2    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:502s
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:499s
fi-skl-guc       total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:432s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:446s
fi-snb-2520m     total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:580s
fi-snb-2600      total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:403s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1237/issues.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [igt-dev] ✗ Fi.CI.IGT: failure for lib/gen6_render: Refactoring lib
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
  2018-04-09 16:49 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
@ 2018-04-09 22:07 ` Patchwork
  2018-04-10 10:35 ` [igt-dev] [PATCH i-g-t] " Katarzyna Dec
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 28+ messages in thread
From: Patchwork @ 2018-04-09 22:07 UTC (permalink / raw)
  To: Lukasz Kalamarz; +Cc: igt-dev

== Series Details ==

Series: lib/gen6_render: Refactoring lib
URL   : https://patchwork.freedesktop.org/series/41379/
State : failure

== Summary ==

---- Possible new issues:

Test kms_cursor_legacy:
        Subgroup 2x-flip-vs-cursor-legacy:
                pass       -> FAIL       (shard-hsw)
Test kms_frontbuffer_tracking:
        Subgroup fbc-2p-primscrn-cur-indfb-onoff:
                pass       -> DMESG-FAIL (shard-hsw)

---- Known issues:

Test kms_cursor_legacy:
        Subgroup flip-vs-cursor-varying-size:
                pass       -> FAIL       (shard-hsw) fdo#102670
Test kms_flip:
        Subgroup 2x-dpms-vs-vblank-race:
                pass       -> FAIL       (shard-hsw) fdo#103060
        Subgroup 2x-flip-vs-expired-vblank-interruptible:
                fail       -> PASS       (shard-hsw) fdo#102887

fdo#102670 https://bugs.freedesktop.org/show_bug.cgi?id=102670
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887

shard-apl        total:2680 pass:1835 dwarn:1   dfail:0   fail:7   skip:836 time:12704s
shard-hsw        total:2680 pass:1782 dwarn:1   dfail:1   fail:4   skip:891 time:11473s
Blacklisted hosts:
shard-kbl        total:2680 pass:1948 dwarn:16  dfail:0   fail:7   skip:709 time:9239s
shard-snb        total:2680 pass:1372 dwarn:1   dfail:0   fail:11  skip:1296 time:6858s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1237/shards.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
  2018-04-09 16:49 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
  2018-04-09 22:07 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-04-10 10:35 ` Katarzyna Dec
  2018-04-10 11:29   ` Kalamarz, Lukasz
  2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 0/4] Refactoring of *_fill libraries Katarzyna Dec
                   ` (12 subsequent siblings)
  15 siblings, 1 reply; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 10:35 UTC (permalink / raw)
  To: igt-dev

On Mon, Apr 09, 2018 at 05:42:29PM +0200, Lukasz Kalamarz wrote:
> This patch is starting a series of refactoring changes for *render*
> libs. A lot of code in those libraries is copy/pasted and renamed for
> different gen.
> 
> Changes made in this patch:
> - removal of duplicated registers definitions
> - move field definitions above it register definition
> - move definitions of register into ascending order
> - unify spaces between register name and it's address/value
> 
> Signed-off-by: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
> Cc: Katarzyna Dec <katarzyna.dec@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

> +#define GEN6_REGISTER_TYPE_UB			4
> +#define GEN6_REGISTER_TYPE_B			5
> +#define GEN6_REGISTER_TYPE_VF			5	/* packed float vector, immediates only? */
> +#define GEN6_REGISTER_TYPE_HF			6
> +#define GEN6_REGISTER_TYPE_V			6	/* packed int vector, immediates only, uword dest only */
> +#define GEN6_REGISTER_TYPE_F			7
>
Generally looks good. There are few style issues: too long lines (like the one above),
mixed tabs and spaces, etc. Use checkpatch for see what needs to be changed.
I think that this changes can be introduced in another patch, so it will be easier
to review.

Kasia
> -- 
> 2.9.5
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v4 0/4] Refactoring of *_fill libraries
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                   ` (2 preceding siblings ...)
  2018-04-10 10:35 ` [igt-dev] [PATCH i-g-t] " Katarzyna Dec
@ 2018-04-10 10:48 ` Katarzyna Dec
  2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 1/4] lib: Move common gpgpu/media fill functions to gpu_fill library Katarzyna Dec
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 10:48 UTC (permalink / raw)
  To: igt-dev

This series is removing duplications in gpgpu_fill and media_fill
libraries. As a first step I moved gpgpu and media helper functions
to gpu_fill library. In second patch I adjusted code to our coding
style. In the third not obvious duplications were removed (like
adding in gen7 functions conditions for future gens). Last patch
adds missing parameters that make GPU hang on gen9 and gen9+.

In first version of this series there was a comment about moving
batch_alloc/copy etc. functions to intel_batchbuffer library.
Because there is a lot of code to review already this change will
be introduced in another series (rendercopy, media_fill, gpgpu_fill
and media_spin code is affected by this).

It is possible that more changes around gen*_media.h and media_spin
is needed, but this will be done as a next step.

v2: Removed not obvious duplications. Adjusted code to review comments.
v3: Series needed reorganization because it introduced bug to ALP,
which was hard to find. That is why patch 1 is now almost only moving functions to gpu_fill with removing duplications, such as the same functions. Also applied comments from review.
v4: Added #defines and copyrights to new gpu_fill library. Changed functions
order in gpu_fill library.

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>


Katarzyna Dec (4):
  lib: Move common gpgpu/media fill functions to gpu_fill library
  lib: Remove duplications in gpu_fill library
  lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill
  lib: Adjust refactored gpu_fill library to our coding style

 lib/Makefile.sources    |   3 +-
 lib/gpgpu_fill.c        | 600 ++--------------------------------------------
 lib/gpgpu_fill.h        |  12 +-
 lib/gpu_fill.c          | 624 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/gpu_fill.h          | 135 +++++++++++
 lib/intel_batchbuffer.c |   4 +-
 lib/media_fill.h        |  23 +-
 lib/media_fill_gen7.c   | 278 +--------------------
 lib/media_fill_gen8.c   | 305 +----------------------
 lib/media_fill_gen8lp.c | 367 ----------------------------
 lib/media_fill_gen9.c   | 308 +-----------------------
 lib/meson.build         |   2 +-
 12 files changed, 815 insertions(+), 1846 deletions(-)
 create mode 100644 lib/gpu_fill.c
 create mode 100644 lib/gpu_fill.h
 delete mode 100644 lib/media_fill_gen8lp.c

-- 
2.14.3

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v4 1/4] lib: Move common gpgpu/media fill functions to gpu_fill library
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                   ` (3 preceding siblings ...)
  2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 0/4] Refactoring of *_fill libraries Katarzyna Dec
@ 2018-04-10 10:48 ` Katarzyna Dec
  2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 2/4] lib: Remove duplications in " Katarzyna Dec
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 10:48 UTC (permalink / raw)
  To: igt-dev

Gpgpu_fill and media_fill libraries are very similar and many
functions can be shared. I have created library gpu_fill with
all functions needed for implementing gpgpu_fill and media_fill
tests for all Gens. For reviewing and debugging purposes this patch
should be only moving functions from few libraries to one removing
functions identical for both media and gpgpu.
Places in the code that required more changes:
  Removing gen7_fill_gpgpu_kernel function that is identical to
gen7_fill_media_kernel and introduces conflict with moving
genX_fill_interface_descriptor, which are the same for media and gpgpu.
  Function gen8_fill_media_kernel is not removed in this patch
(although it is identical with gen7 version), because this patch
should be as much as possible functions movement.
  gen8_fill_interface_descriptor was unified for media and gpgpu
by adding kernel and its size as a parameter (this parameters
were missing in media gen8, gen8lp and gen9 functions)
  gen8_emit_state_base_address was unified, the one for gpgpu was
configured like it would be using indirect state (while we are
using CURBE). I have checked that media fill version
(OUT_BATCH(0 | BASE_ADDRESS_MODIFY)) works fine on gpgpu gen8 and newer.

v2: Changed code layout. GenX_fill_media_kernel was identical to
genX_fill_gpgpu_kernel so this function was unified to
gen7_fill_kernel. There were 2 very similar functions
gen8_emit_state_base_address for media and gpgpu, where the one
for gpgpu was configured like it would be using indirect state
(while we are using CURBE). I have checked if media fill version
works fine in gpgpu test on Gen8 and unified them.

v3: Made patch easier for reviewing moving changes unifying code for
various gens (that were included v1) to other patch, leaving only
the most critical code changes.

v4: Added copyrights and #define to gpu_fill.h

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 lib/Makefile.sources    |   2 +
 lib/gpgpu_fill.c        | 571 +-----------------------------------
 lib/gpu_fill.c          | 758 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/gpu_fill.h          | 167 +++++++++++
 lib/media_fill_gen7.c   | 271 +----------------
 lib/media_fill_gen8.c   | 290 +-----------------
 lib/media_fill_gen8lp.c | 284 +-----------------
 lib/media_fill_gen9.c   | 298 +------------------
 lib/meson.build         |   1 +
 9 files changed, 937 insertions(+), 1705 deletions(-)
 create mode 100644 lib/gpu_fill.c
 create mode 100644 lib/gpu_fill.h

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 3d37ef1d..45e65dd7 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -64,6 +64,8 @@ lib_source_list =	 	\
 	media_spin.c		\
 	gpgpu_fill.h		\
 	gpgpu_fill.c		\
+	gpu_fill.h		\
+	gpu_fill.c		\
 	gen7_media.h            \
 	gen8_media.h            \
 	rendercopy_i915.c	\
diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index 4d98643d..f2765fd6 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -30,10 +30,9 @@
 
 #include "intel_reg.h"
 #include "drmtest.h"
-#include "intel_batchbuffer.h"
-#include "gen7_media.h"
-#include "gen8_media.h"
+
 #include "gpgpu_fill.h"
+#include "gpu_fill.h"
 
 /* shaders/gpgpu/gpgpu_fill.gxa */
 static const uint32_t gen7_gpgpu_kernel[][4] = {
@@ -75,572 +74,6 @@ static const uint32_t gen9_gpgpu_kernel[][4] = {
 	{ 0x07800031, 0x20000a40, 0x06000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size,
-	   uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-static uint32_t
-gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch, uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen7_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN7_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss1.base_addr = buf->bo->offset;
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen8_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN8_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-	ss->ss0.vertical_alignment = 1; /* align 4 */
-	ss->ss0.horizontal_alignment = 1; /* align 4 */
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss8.base_addr = buf->bo->offset;
-
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 8 * 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert_eq(ret, 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-
-}
-
-static uint32_t
-gen7_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen7_fill_surface_state(batch, dst, GEN7_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst, GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_gpgpu_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
-			       const uint32_t kernel[][4], size_t size)
-{
-	struct gen7_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen7_fill_binding_table(batch, dst);
-	kernel_offset = gen7_fill_gpgpu_kernel(batch, kernel, size);
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc1.single_program_flow = 1;
-	idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
-
-	idd->desc2.sampler_count = 0;      /* 0 samplers used */
-	idd->desc2.sampler_state_pointer = 0;
-
-	idd->desc3.binding_table_entry_count = 0;
-	idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc4.constant_urb_entry_read_offset = 0;
-	idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
-			       const uint32_t kernel[][4], size_t size)
-{
-	struct gen8_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen7_fill_gpgpu_kernel(batch, kernel, size);
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc2.single_program_flow = 1;
-	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
-
-	idd->desc3.sampler_count = 0;      /* 0 samplers used */
-	idd->desc3.sampler_state_pointer = 0;
-
-	idd->desc4.binding_table_entry_count = 0;
-	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc5.constant_urb_entry_read_offset = 0;
-	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen7_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
-
-	/* general */
-	OUT_BATCH(0);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general/dynamic/indirect/instruction access Bound */
-	OUT_BATCH(0);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-}
-
-static void
-gen8_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
-
-	/* general */
-	OUT_BATCH(0 | (0x78 << 4) | (0 << 1) |  BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		  0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0 );
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-}
-
-static void
-gen9_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
-
-	/* general */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-
-	/* Bindless surface state base address */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-	OUT_BATCH(0xfffff000);
-}
-
-static void
-gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 | /* max num of threads */
-		  0 << 8 | /* num of URB entry */
-		  1 << 2); /* GPGPU mode */
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(0 << 16 | 	/* URB entry size in 256 bits unit */
-		  1);		/* CURBE entry size in 256 bits unit */
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 | 1 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(0 << 16 | 1);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN7_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-static void
-gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
-		     unsigned x, unsigned y,
-		     unsigned width, unsigned height)
-{
-	uint32_t x_dim, y_dim, tmp, right_mask;
-
-	/*
-	 * Simply do SIMD16 based dispatch, so every thread uses
-	 * SIMD16 channels.
-	 *
-	 * Define our own thread group size, e.g 16x1 for every group, then
-	 * will have 1 thread each group in SIMD16 dispatch. So thread
-	 * width/height/depth are all 1.
-	 *
-	 * Then thread group X = width / 16 (aligned to 16)
-	 * thread group Y = height;
-	 */
-	x_dim = (width + 15) / 16;
-	y_dim = height;
-
-	tmp = width & 15;
-	if (tmp == 0)
-		right_mask = (1 << 16) - 1;
-	else
-		right_mask = (1 << tmp) - 1;
-
-	OUT_BATCH(GEN7_GPGPU_WALKER | 9);
-
-	/* interface descriptor offset */
-	OUT_BATCH(0);
-
-	/* SIMD size, thread w/h/d */
-	OUT_BATCH(1 << 30 | /* SIMD16 */
-		  0 << 16 | /* depth:1 */
-		  0 << 8 | /* height:1 */
-		  0); /* width:1 */
-
-	/* thread group X */
-	OUT_BATCH(0);
-	OUT_BATCH(x_dim);
-
-	/* thread group Y */
-	OUT_BATCH(0);
-	OUT_BATCH(y_dim);
-
-	/* thread group Z */
-	OUT_BATCH(0);
-	OUT_BATCH(1);
-
-	/* right mask */
-	OUT_BATCH(right_mask);
-
-	/* bottom mask, height 1, always 0xffffffff */
-	OUT_BATCH(0xffffffff);
-}
-
-static void
-gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
-		     unsigned x, unsigned y,
-		     unsigned width, unsigned height)
-{
-	uint32_t x_dim, y_dim, tmp, right_mask;
-
-	/*
-	 * Simply do SIMD16 based dispatch, so every thread uses
-	 * SIMD16 channels.
-	 *
-	 * Define our own thread group size, e.g 16x1 for every group, then
-	 * will have 1 thread each group in SIMD16 dispatch. So thread
-	 * width/height/depth are all 1.
-	 *
-	 * Then thread group X = width / 16 (aligned to 16)
-	 * thread group Y = height;
-	 */
-	x_dim = (width + 15) / 16;
-	y_dim = height;
-
-	tmp = width & 15;
-	if (tmp == 0)
-		right_mask = (1 << 16) - 1;
-	else
-		right_mask = (1 << tmp) - 1;
-
-	OUT_BATCH(GEN7_GPGPU_WALKER | 13);
-
-	OUT_BATCH(0); /* kernel offset */
-	OUT_BATCH(0); /* indirect data length */
-	OUT_BATCH(0); /* indirect data offset */
-
-	/* SIMD size, thread w/h/d */
-	OUT_BATCH(1 << 30 | /* SIMD16 */
-		  0 << 16 | /* depth:1 */
-		  0 << 8 | /* height:1 */
-		  0); /* width:1 */
-
-	/* thread group X */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(x_dim);
-
-	/* thread group Y */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(y_dim);
-
-	/* thread group Z */
-	OUT_BATCH(0);
-	OUT_BATCH(1);
-
-	/* right mask */
-	OUT_BATCH(right_mask);
-
-	/* bottom mask, height 1, always 0xffffffff */
-	OUT_BATCH(0xffffffff);
-}
-
 /*
  * This sets up the gpgpu pipeline,
  *
diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
new file mode 100644
index 00000000..172c6db6
--- /dev/null
+++ b/lib/gpu_fill.c
@@ -0,0 +1,758 @@
+#include "gpu_fill.h"
+
+uint32_t
+batch_used(struct intel_batchbuffer *batch)
+{
+	return batch->ptr - batch->buffer;
+}
+
+uint32_t
+batch_align(struct intel_batchbuffer *batch, uint32_t align)
+{
+	uint32_t offset = batch_used(batch);
+	offset = ALIGN(offset, align);
+	batch->ptr = batch->buffer + offset;
+	return offset;
+}
+
+void *
+batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
+{
+	uint32_t offset = batch_align(batch, align);
+	batch->ptr += size;
+	return memset(batch->buffer + offset, 0, size);
+}
+
+uint32_t
+batch_offset(struct intel_batchbuffer *batch, void *ptr)
+{
+	return (uint8_t *)ptr - batch->buffer;
+}
+
+uint32_t
+batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
+{
+	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
+}
+
+void
+gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
+{
+	int ret;
+
+	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
+	if (ret == 0)
+		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
+					NULL, 0, 0, 0);
+	igt_assert(ret == 0);
+}
+
+uint32_t
+gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
+			uint8_t color)
+{
+	uint8_t *curbe_buffer;
+	uint32_t offset;
+
+	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
+	offset = batch_offset(batch, curbe_buffer);
+	*curbe_buffer = color;
+
+	return offset;
+}
+
+uint32_t
+gen7_fill_surface_state(struct intel_batchbuffer *batch,
+			struct igt_buf *buf,
+			uint32_t format,
+			int is_dst)
+{
+	struct gen7_surface_state *ss;
+	uint32_t write_domain, read_domain, offset;
+	int ret;
+
+	if (is_dst) {
+		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
+	} else {
+		write_domain = 0;
+		read_domain = I915_GEM_DOMAIN_SAMPLER;
+	}
+
+	ss = batch_alloc(batch, sizeof(*ss), 64);
+	offset = batch_offset(batch, ss);
+
+	ss->ss0.surface_type = GEN7_SURFACE_2D;
+	ss->ss0.surface_format = format;
+	ss->ss0.render_cache_read_write = 1;
+
+	if (buf->tiling == I915_TILING_X)
+		ss->ss0.tiled_mode = 2;
+	else if (buf->tiling == I915_TILING_Y)
+		ss->ss0.tiled_mode = 3;
+
+	ss->ss1.base_addr = buf->bo->offset;
+	ret = drm_intel_bo_emit_reloc(batch->bo,
+				batch_offset(batch, ss) + 4,
+				buf->bo, 0,
+				read_domain, write_domain);
+	igt_assert(ret == 0);
+
+	ss->ss2.height = igt_buf_height(buf) - 1;
+	ss->ss2.width  = igt_buf_width(buf) - 1;
+
+	ss->ss3.pitch  = buf->stride - 1;
+
+	ss->ss7.shader_chanel_select_r = 4;
+	ss->ss7.shader_chanel_select_g = 5;
+	ss->ss7.shader_chanel_select_b = 6;
+	ss->ss7.shader_chanel_select_a = 7;
+
+	return offset;
+}
+
+uint32_t
+gen7_fill_binding_table(struct intel_batchbuffer *batch,
+			struct igt_buf *dst)
+{
+	uint32_t *binding_table, offset;
+
+	binding_table = batch_alloc(batch, 32, 64);
+	offset = batch_offset(batch, binding_table);
+	binding_table[0] = gen7_fill_surface_state(batch, dst,
+						GEN7_SURFACEFORMAT_R8_UNORM, 1);
+
+	return offset;
+}
+
+uint32_t
+gen7_fill_media_kernel(struct intel_batchbuffer *batch,
+		const uint32_t kernel[][4],
+		size_t size)
+{
+	uint32_t offset;
+
+	offset = batch_copy(batch, kernel, size, 64);
+
+	return offset;
+}
+
+uint32_t
+gen8_fill_media_kernel(struct intel_batchbuffer *batch,
+		const uint32_t kernel[][4],
+		size_t size)
+{
+	uint32_t offset;
+
+	offset = batch_copy(batch, kernel, size, 64);
+
+	return offset;
+}
+
+uint32_t
+gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
+			       const uint32_t kernel[][4], size_t size)
+{
+	struct gen7_interface_descriptor_data *idd;
+	uint32_t offset;
+	uint32_t binding_table_offset, kernel_offset;
+
+	binding_table_offset = gen7_fill_binding_table(batch, dst);
+	kernel_offset = gen7_fill_media_kernel(batch, kernel, size);
+
+	idd = batch_alloc(batch, sizeof(*idd), 64);
+	offset = batch_offset(batch, idd);
+
+	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
+
+	idd->desc1.single_program_flow = 1;
+	idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
+
+	idd->desc2.sampler_count = 0;      /* 0 samplers used */
+	idd->desc2.sampler_state_pointer = 0;
+
+	idd->desc3.binding_table_entry_count = 0;
+	idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
+
+	idd->desc4.constant_urb_entry_read_offset = 0;
+	idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
+
+	return offset;
+}
+
+void
+gen7_emit_state_base_address(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
+
+	/* general */
+	OUT_BATCH(0);
+
+	/* surface */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* dynamic */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* indirect */
+	OUT_BATCH(0);
+
+	/* instruction */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* general/dynamic/indirect/instruction access Bound */
+	OUT_BATCH(0);
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+}
+
+void
+gen7_emit_vfe_state(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
+
+	/* scratch buffer */
+	OUT_BATCH(0);
+
+	/* number of threads & urb entries */
+	OUT_BATCH(1 << 16 |
+		2 << 8);
+
+	OUT_BATCH(0);
+
+	/* urb entry size & curbe size */
+	OUT_BATCH(2 << 16 | 	/* in 256 bits unit */
+		2);		/* in 256 bits unit */
+
+	/* scoreboard */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
+
+	/* scratch buffer */
+	OUT_BATCH(0);
+
+	/* number of threads & urb entries */
+	OUT_BATCH(1 << 16 | /* max num of threads */
+		  0 << 8 | /* num of URB entry */
+		  1 << 2); /* GPGPU mode */
+
+	OUT_BATCH(0);
+
+	/* urb entry size & curbe size */
+	OUT_BATCH(0 << 16 | 	/* URB entry size in 256 bits unit */
+		  1);		/* CURBE entry size in 256 bits unit */
+
+	/* scoreboard */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
+{
+	OUT_BATCH(GEN7_MEDIA_CURBE_LOAD | (4 - 2));
+	OUT_BATCH(0);
+	/* curbe total data length */
+	OUT_BATCH(64);
+	/* curbe data start address, is relative to the dynamics base address */
+	OUT_BATCH(curbe_buffer);
+}
+
+void
+gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
+{
+	OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
+	OUT_BATCH(0);
+	/* interface descriptor data length */
+	OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
+	/* interface descriptor address, is relative to the dynamics base address */
+	OUT_BATCH(interface_descriptor);
+}
+
+void
+gen7_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height)
+{
+	int i, j;
+
+	for (i = 0; i < width / 16; i++) {
+		for (j = 0; j < height / 16; j++) {
+			OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
+
+			/* interface descriptor offset */
+			OUT_BATCH(0);
+
+			/* without indirect data */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* scoreboard */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* inline data (xoffset, yoffset) */
+			OUT_BATCH(x + i * 16);
+			OUT_BATCH(y + j * 16);
+		}
+	}
+}
+
+void
+gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
+		     unsigned x, unsigned y,
+		     unsigned width, unsigned height)
+{
+	uint32_t x_dim, y_dim, tmp, right_mask;
+
+	/*
+	 * Simply do SIMD16 based dispatch, so every thread uses
+	 * SIMD16 channels.
+	 *
+	 * Define our own thread group size, e.g 16x1 for every group, then
+	 * will have 1 thread each group in SIMD16 dispatch. So thread
+	 * width/height/depth are all 1.
+	 *
+	 * Then thread group X = width / 16 (aligned to 16)
+	 * thread group Y = height;
+	 */
+	x_dim = (width + 15) / 16;
+	y_dim = height;
+
+	tmp = width & 15;
+	if (tmp == 0)
+		right_mask = (1 << 16) - 1;
+	else
+		right_mask = (1 << tmp) - 1;
+
+	OUT_BATCH(GEN7_GPGPU_WALKER | 9);
+
+	/* interface descriptor offset */
+	OUT_BATCH(0);
+
+	/* SIMD size, thread w/h/d */
+	OUT_BATCH(1 << 30 | /* SIMD16 */
+		  0 << 16 | /* depth:1 */
+		  0 << 8 | /* height:1 */
+		  0); /* width:1 */
+
+	/* thread group X */
+	OUT_BATCH(0);
+	OUT_BATCH(x_dim);
+
+	/* thread group Y */
+	OUT_BATCH(0);
+	OUT_BATCH(y_dim);
+
+	/* thread group Z */
+	OUT_BATCH(0);
+	OUT_BATCH(1);
+
+	/* right mask */
+	OUT_BATCH(right_mask);
+
+	/* bottom mask, height 1, always 0xffffffff */
+	OUT_BATCH(0xffffffff);
+}
+
+void
+gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
+{
+	int ret;
+
+	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
+	if (ret == 0)
+		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
+					NULL, 0, 0, 0);
+	igt_assert(ret == 0);
+}
+
+
+uint32_t
+gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
+			uint8_t color)
+{
+	uint8_t *curbe_buffer;
+	uint32_t offset;
+
+	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
+	offset = batch_offset(batch, curbe_buffer);
+	*curbe_buffer = color;
+
+	return offset;
+}
+
+uint32_t
+gen8_fill_surface_state(struct intel_batchbuffer *batch,
+			struct igt_buf *buf,
+			uint32_t format,
+			int is_dst)
+{
+	struct gen8_surface_state *ss;
+	uint32_t write_domain, read_domain, offset;
+	int ret;
+
+	if (is_dst) {
+		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
+	} else {
+		write_domain = 0;
+		read_domain = I915_GEM_DOMAIN_SAMPLER;
+	}
+
+	ss = batch_alloc(batch, sizeof(*ss), 64);
+	offset = batch_offset(batch, ss);
+
+	ss->ss0.surface_type = GEN8_SURFACE_2D;
+	ss->ss0.surface_format = format;
+	ss->ss0.render_cache_read_write = 1;
+	ss->ss0.vertical_alignment = 1; /* align 4 */
+	ss->ss0.horizontal_alignment = 1; /* align 4 */
+
+	if (buf->tiling == I915_TILING_X)
+		ss->ss0.tiled_mode = 2;
+	else if (buf->tiling == I915_TILING_Y)
+		ss->ss0.tiled_mode = 3;
+
+	ss->ss8.base_addr = buf->bo->offset;
+
+	ret = drm_intel_bo_emit_reloc(batch->bo,
+				batch_offset(batch, ss) + 8 * 4,
+				buf->bo, 0,
+				read_domain, write_domain);
+	igt_assert(ret == 0);
+
+	ss->ss2.height = igt_buf_height(buf) - 1;
+	ss->ss2.width  = igt_buf_width(buf) - 1;
+	ss->ss3.pitch  = buf->stride - 1;
+
+	ss->ss7.shader_chanel_select_r = 4;
+	ss->ss7.shader_chanel_select_g = 5;
+	ss->ss7.shader_chanel_select_b = 6;
+	ss->ss7.shader_chanel_select_a = 7;
+
+	return offset;
+}
+
+uint32_t
+gen8_fill_binding_table(struct intel_batchbuffer *batch,
+			struct igt_buf *dst)
+{
+	uint32_t *binding_table, offset;
+
+	binding_table = batch_alloc(batch, 32, 64);
+	offset = batch_offset(batch, binding_table);
+
+	binding_table[0] = gen8_fill_surface_state(batch, dst,
+						GEN8_SURFACEFORMAT_R8_UNORM, 1);
+
+	return offset;
+}
+
+uint32_t
+gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size)
+{
+	struct gen8_interface_descriptor_data *idd;
+	uint32_t offset;
+	uint32_t binding_table_offset, kernel_offset;
+
+	binding_table_offset = gen8_fill_binding_table(batch, dst);
+	kernel_offset = gen8_fill_media_kernel(batch, kernel, size);
+
+	idd = batch_alloc(batch, sizeof(*idd), 64);
+	offset = batch_offset(batch, idd);
+
+	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
+
+	idd->desc2.single_program_flow = 1;
+	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
+
+	idd->desc3.sampler_count = 0;      /* 0 samplers used */
+	idd->desc3.sampler_state_pointer = 0;
+
+	idd->desc4.binding_table_entry_count = 0;
+	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
+
+	idd->desc5.constant_urb_entry_read_offset = 0;
+	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
+
+	return offset;
+}
+
+void
+gen8_emit_state_base_address(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
+
+	/* general */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+
+	/* stateless data port */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+
+	/* surface */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
+
+	/* dynamic */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
+		0, BASE_ADDRESS_MODIFY);
+
+	/* indirect */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* instruction */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* general state buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* dynamic state buffer size */
+	OUT_BATCH(1 << 12 | 1);
+	/* indirect object buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
+	OUT_BATCH(1 << 12 | 1);
+}
+
+void
+gen8_emit_vfe_state(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
+
+	/* scratch buffer */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* number of threads & urb entries */
+	OUT_BATCH(1 << 16 |
+		2 << 8);
+
+	OUT_BATCH(0);
+
+	/* urb entry size & curbe size */
+	OUT_BATCH(2 << 16 |
+		2);
+
+	/* scoreboard */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
+
+	/* scratch buffer */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* number of threads & urb entries */
+	OUT_BATCH(1 << 16 | 1 << 8);
+
+	OUT_BATCH(0);
+
+	/* urb entry size & curbe size */
+	OUT_BATCH(0 << 16 | 1);
+
+	/* scoreboard */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
+{
+	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
+	OUT_BATCH(0);
+	/* curbe total data length */
+	OUT_BATCH(64);
+	/* curbe data start address, is relative to the dynamics base address */
+	OUT_BATCH(curbe_buffer);
+}
+
+void
+gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
+{
+	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
+	OUT_BATCH(0);
+	/* interface descriptor data length */
+	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
+	/* interface descriptor address, is relative to the dynamics base address */
+	OUT_BATCH(interface_descriptor);
+}
+
+void
+gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
+	OUT_BATCH(0);
+}
+
+void
+gen8_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height)
+{
+	int i, j;
+
+	for (i = 0; i < width / 16; i++) {
+		for (j = 0; j < height / 16; j++) {
+			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
+
+			/* interface descriptor offset */
+			OUT_BATCH(0);
+
+			/* without indirect data */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* scoreboard */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* inline data (xoffset, yoffset) */
+			OUT_BATCH(x + i * 16);
+			OUT_BATCH(y + j * 16);
+			gen8_emit_media_state_flush(batch);
+		}
+	}
+}
+void
+gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height)
+{
+	int i, j;
+
+	for (i = 0; i < width / 16; i++) {
+		for (j = 0; j < height / 16; j++) {
+			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
+
+			/* interface descriptor offset */
+			OUT_BATCH(0);
+
+			/* without indirect data */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* scoreboard */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* inline data (xoffset, yoffset) */
+			OUT_BATCH(x + i * 16);
+			OUT_BATCH(y + j * 16);
+		}
+	}
+}
+void
+gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
+		     unsigned x, unsigned y,
+		     unsigned width, unsigned height)
+{
+	uint32_t x_dim, y_dim, tmp, right_mask;
+
+	/*
+	 * Simply do SIMD16 based dispatch, so every thread uses
+	 * SIMD16 channels.
+	 *
+	 * Define our own thread group size, e.g 16x1 for every group, then
+	 * will have 1 thread each group in SIMD16 dispatch. So thread
+	 * width/height/depth are all 1.
+	 *
+	 * Then thread group X = width / 16 (aligned to 16)
+	 * thread group Y = height;
+	 */
+	x_dim = (width + 15) / 16;
+	y_dim = height;
+
+	tmp = width & 15;
+	if (tmp == 0)
+		right_mask = (1 << 16) - 1;
+	else
+		right_mask = (1 << tmp) - 1;
+
+	OUT_BATCH(GEN7_GPGPU_WALKER | 13);
+
+	OUT_BATCH(0); /* kernel offset */
+	OUT_BATCH(0); /* indirect data length */
+	OUT_BATCH(0); /* indirect data offset */
+
+	/* SIMD size, thread w/h/d */
+	OUT_BATCH(1 << 30 | /* SIMD16 */
+		  0 << 16 | /* depth:1 */
+		  0 << 8 | /* height:1 */
+		  0); /* width:1 */
+
+	/* thread group X */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(x_dim);
+
+	/* thread group Y */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(y_dim);
+
+	/* thread group Z */
+	OUT_BATCH(0);
+	OUT_BATCH(1);
+
+	/* right mask */
+	OUT_BATCH(right_mask);
+
+	/* bottom mask, height 1, always 0xffffffff */
+	OUT_BATCH(0xffffffff);
+}
+
+void
+gen9_emit_state_base_address(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
+
+	/* general */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+
+	/* stateless data port */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+
+	/* surface */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
+
+	/* dynamic */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
+		0, BASE_ADDRESS_MODIFY);
+
+	/* indirect */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* instruction */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* general state buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* dynamic state buffer size */
+	OUT_BATCH(1 << 12 | 1);
+	/* indirect object buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
+	OUT_BATCH(1 << 12 | 1);
+
+	/* Bindless surface state base address */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+	OUT_BATCH(0xfffff000);
+}
diff --git a/lib/gpu_fill.h b/lib/gpu_fill.h
new file mode 100644
index 00000000..87e62c86
--- /dev/null
+++ b/lib/gpu_fill.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef GPU_FILL_H
+#define GPU_FILL_H
+
+#include <intel_bufmgr.h>
+#include <i915_drm.h>
+
+#include "media_fill.h"
+#include "gen7_media.h"
+#include "gen8_media.h"
+#include "intel_reg.h"
+#include "drmtest.h"
+#include "intel_batchbuffer.h"
+#include "intel_chipset.h"
+#include <assert.h>
+
+uint32_t
+batch_used(struct intel_batchbuffer *batch);
+
+uint32_t
+batch_align(struct intel_batchbuffer *batch, uint32_t align);
+
+void *
+batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align);
+
+uint32_t
+batch_offset(struct intel_batchbuffer *batch, void *ptr);
+
+uint32_t
+batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align);
+
+void
+gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
+
+uint32_t
+gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
+			uint8_t color);
+
+uint32_t
+gen7_fill_surface_state(struct intel_batchbuffer *batch,
+			struct igt_buf *buf,
+			uint32_t format,
+			int is_dst);
+
+uint32_t
+gen7_fill_binding_table(struct intel_batchbuffer *batch,
+			struct igt_buf *dst);
+
+uint32_t
+gen7_fill_media_kernel(struct intel_batchbuffer *batch,
+		const uint32_t kernel[][4],
+		size_t size);
+
+uint32_t
+gen8_fill_media_kernel(struct intel_batchbuffer *batch,
+		const uint32_t kernel[][4],
+		size_t size);
+
+uint32_t
+gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
+			       const uint32_t kernel[][4], size_t size);
+
+void
+gen7_emit_state_base_address(struct intel_batchbuffer *batch);
+
+void
+gen7_emit_vfe_state(struct intel_batchbuffer *batch);
+
+void
+gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
+
+void
+gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
+
+void
+gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor);
+
+void
+gen7_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height);
+
+void
+gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
+		     unsigned x, unsigned y,
+		     unsigned width, unsigned height);
+
+void
+gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
+
+uint32_t
+gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
+			uint8_t color);
+
+uint32_t
+gen8_fill_surface_state(struct intel_batchbuffer *batch,
+			struct igt_buf *buf,
+			uint32_t format,
+			int is_dst);
+
+uint32_t
+gen8_fill_binding_table(struct intel_batchbuffer *batch,
+			struct igt_buf *dst);
+
+uint32_t
+gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size);
+
+void
+gen8_emit_state_base_address(struct intel_batchbuffer *batch);
+
+void
+gen8_emit_vfe_state(struct intel_batchbuffer *batch);
+
+void
+gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
+
+void
+gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
+
+void
+gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor);
+
+void
+gen8_emit_media_state_flush(struct intel_batchbuffer *batch);
+
+void
+gen8_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height);
+
+void
+gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height);
+
+void
+gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
+		     unsigned x, unsigned y,
+		     unsigned width, unsigned height);
+
+void
+gen9_emit_state_base_address(struct intel_batchbuffer *batch);
+
+#endif /* GPU_FILL_H */
diff --git a/lib/media_fill_gen7.c b/lib/media_fill_gen7.c
index 6fb44798..c97555a6 100644
--- a/lib/media_fill_gen7.c
+++ b/lib/media_fill_gen7.c
@@ -5,7 +5,7 @@
 #include "gen7_media.h"
 #include "intel_reg.h"
 #include "drmtest.h"
-
+#include "gpu_fill.h"
 #include <assert.h>
 
 static const uint32_t media_kernel[][4] = {
@@ -22,275 +22,6 @@ static const uint32_t media_kernel[][4] = {
 	{ 0x07800031, 0x20001ca8, 0x00000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-static uint32_t
-gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen7_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN7_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss1.base_addr = buf->bo->offset;
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen7_fill_surface_state(batch, dst, GEN7_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
-			       const uint32_t kernel[][4], size_t size)
-{
-	struct gen7_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen7_fill_binding_table(batch, dst);
-	kernel_offset = gen7_fill_media_kernel(batch, kernel, size);
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc1.single_program_flow = 1;
-	idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
-
-	idd->desc2.sampler_count = 0;      /* 0 samplers used */
-	idd->desc2.sampler_state_pointer = 0;
-
-	idd->desc3.binding_table_entry_count = 0;
-	idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc4.constant_urb_entry_read_offset = 0;
-	idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen7_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
-
-	/* general */
-	OUT_BATCH(0);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general/dynamic/indirect/instruction access Bound */
-	OUT_BATCH(0);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-}
-
-static void
-gen7_emit_vfe_state(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 |
-		2 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 | 	/* in 256 bits unit */
-		2);		/* in 256 bits unit */
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN7_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-static void
-gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen7_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-		}
-	}
-}
-
 /*
  * This sets up the media pipeline,
  *
diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
index 4a8fe5a2..4270997e 100644
--- a/lib/media_fill_gen8.c
+++ b/lib/media_fill_gen8.c
@@ -5,7 +5,7 @@
 #include "gen8_media.h"
 #include "intel_reg.h"
 #include "drmtest.h"
-
+#include "gpu_fill.h"
 #include <assert.h>
 
 
@@ -23,293 +23,7 @@ static const uint32_t media_kernel[][4] = {
 	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-static uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen8_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN8_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-	ss->ss0.vertical_alignment = 1; /* align 4 */
-	ss->ss0.horizontal_alignment = 1; /* align 4 */
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss8.base_addr = buf->bo->offset;
-
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 8 * 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst, GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst)
-{
-	struct gen8_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen8_fill_media_kernel(batch, media_kernel, sizeof(media_kernel));
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc2.single_program_flow = 1;
-	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
-
-	idd->desc3.sampler_count = 0;      /* 0 samplers used */
-	idd->desc3.sampler_state_pointer = 0;
-
-	idd->desc4.binding_table_entry_count = 0;
-	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc5.constant_urb_entry_read_offset = 0;
-	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen8_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
-
-	/* general */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-}
-
-static void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 |
-		2 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 |
-		2);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
 
-static void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-			gen8_emit_media_state_flush(batch);
-		}
-	}
-}
 
 /*
  * This sets up the media pipeline,
@@ -349,7 +63,7 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst);
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/media_fill_gen8lp.c b/lib/media_fill_gen8lp.c
index 1f8a4adc..dcc11982 100644
--- a/lib/media_fill_gen8lp.c
+++ b/lib/media_fill_gen8lp.c
@@ -5,7 +5,7 @@
 #include "gen8_media.h"
 #include "intel_reg.h"
 #include "drmtest.h"
-
+#include "gpu_fill.h"
 #include <assert.h>
 
 
@@ -23,286 +23,6 @@ static const uint32_t media_kernel[][4] = {
 	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-static uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen8_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN8_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-	ss->ss0.vertical_alignment = 1; /* align 4 */
-	ss->ss0.horizontal_alignment = 1; /* align 4 */
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss8.base_addr = buf->bo->offset;
-
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 8 * 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst, GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst)
-{
-	struct gen8_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen8_fill_media_kernel(batch, media_kernel, sizeof(media_kernel));
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc2.single_program_flow = 1;
-	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
-
-	idd->desc3.sampler_count = 0;      /* 0 samplers used */
-	idd->desc3.sampler_state_pointer = 0;
-
-	idd->desc4.binding_table_entry_count = 0;
-	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc5.constant_urb_entry_read_offset = 0;
-	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen8_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
-
-	/* general */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-}
-
-static void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 |
-		2 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 |
-		2);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-static void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-		}
-	}
-}
-
 /*
  * This sets up the media pipeline,
  *
@@ -341,7 +61,7 @@ gen8lp_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst);
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
index 3fd21819..6accdbe4 100644
--- a/lib/media_fill_gen9.c
+++ b/lib/media_fill_gen9.c
@@ -4,11 +4,9 @@
 #include "media_fill.h"
 #include "gen8_media.h"
 #include "intel_reg.h"
-
+#include "gpu_fill.h"
 #include <assert.h>
 
-#define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1))
-
 static const uint32_t media_kernel[][4] = {
 	{ 0x00400001, 0x20202288, 0x00000020, 0x00000000 },
 	{ 0x00600001, 0x20800208, 0x008d0000, 0x00000000 },
@@ -23,298 +21,6 @@ static const uint32_t media_kernel[][4] = {
 	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	assert(ret == 0);
-}
-
-static uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen8_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN8_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-	ss->ss0.vertical_alignment = 1; /* align 4 */
-	ss->ss0.horizontal_alignment = 1; /* align 4 */
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss8.base_addr = buf->bo->offset;
-
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 8 * 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst, GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst)
-{
-	struct gen8_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen8_fill_media_kernel(batch, media_kernel, sizeof(media_kernel));
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc2.single_program_flow = 1;
-	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
-
-	idd->desc3.sampler_count = 0;      /* 0 samplers used */
-	idd->desc3.sampler_state_pointer = 0;
-
-	idd->desc4.binding_table_entry_count = 0;
-	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc5.constant_urb_entry_read_offset = 0;
-	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen9_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
-
-	/* general */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-
-	/* Bindless surface state base address */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-	OUT_BATCH(0xfffff000);
-}
-
-static void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 |
-		2 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 |
-		2);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-static void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-			gen8_emit_media_state_flush(batch);
-		}
-	}
-}
 
 /*
  * This sets up the media pipeline,
@@ -354,7 +60,7 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst);
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/meson.build b/lib/meson.build
index b3b8b14a..385e08b9 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -30,6 +30,7 @@ lib_sources = [
 	'media_fill_gen9.c',
 	'media_spin.c',
 	'gpgpu_fill.c',
+	'gpu_fill.c',
 	'rendercopy_i915.c',
 	'rendercopy_i830.c',
 	'rendercopy_gen6.c',
-- 
2.14.3

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v4 2/4] lib: Remove duplications in gpu_fill library
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                   ` (4 preceding siblings ...)
  2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 1/4] lib: Move common gpgpu/media fill functions to gpu_fill library Katarzyna Dec
@ 2018-04-10 10:48 ` Katarzyna Dec
  2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 3/4] lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill Katarzyna Dec
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 10:48 UTC (permalink / raw)
  To: igt-dev

After moving all functions needed for gpgpu and media fill testing
there is a lot of duplications which can be removed:
  Library media_fill_gen8 and media_fill_gen8lp for CHT was removed,
media state flush for !CHT was added to gen7_emit_media_objects.
  Many gen8 functions were replaced with gen7 version with devid
parameter (gen7_fill_curbe_load, gen7_emit_interface_descriptor,
gen7_fill_binding_table, gen7_emit_media_objects). Unified fill kernel
function so it is applicable to all gens and both media and gpgpu
(merged gen7_fill_media_kernel and gen8_fill_media_kernel).
  Duplicated constants like GEN8_MEDIA_VFE_STATE, GEN8_MEDIA_CURBE_LOAD,
GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, GEN8_MEDIA_OBJECT were
replaced by GEN7 version. However this constants were not removed
from gen8_media.h library, because they are used by other tests
for Gen8+. More refactoring in this gen*_media.h libraries is needed.

It seems that further unification of *_fillfunc functions will
introduce more confusion in understanding what the tests are doing
and what were changes between Gens.

v2: Moved some reduntant changes from Move gpgpu/media fill to gpu_fill...
to this patch. Applied comments from review.

v3: rebase

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 lib/Makefile.sources    |   1 -
 lib/gpgpu_fill.c        |   2 +-
 lib/gpu_fill.c          | 172 +++++++-----------------------------------------
 lib/gpu_fill.h          |  38 +----------
 lib/intel_batchbuffer.c |   4 +-
 lib/media_fill.h        |   7 --
 lib/media_fill_gen8.c   |  10 +--
 lib/media_fill_gen8lp.c |  87 ------------------------
 lib/media_fill_gen9.c   |  10 +--
 lib/meson.build         |   1 -
 10 files changed, 39 insertions(+), 293 deletions(-)
 delete mode 100644 lib/media_fill_gen8lp.c

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 45e65dd7..9c0150c1 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -58,7 +58,6 @@ lib_source_list =	 	\
 	media_fill.h            \
 	media_fill_gen7.c       \
 	media_fill_gen8.c       \
-	media_fill_gen8lp.c     \
 	media_fill_gen9.c       \
 	media_spin.h		\
 	media_spin.c		\
diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index f2765fd6..579ce78d 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -180,7 +180,7 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	gen8_emit_state_base_address(batch);
 	gen8_emit_vfe_state_gpgpu(batch);
 	gen7_emit_curbe_load(batch, curbe_buffer);
-	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
+	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
 	gen8_emit_gpgpu_walk(batch, x, y, width, height);
 
 	OUT_BATCH(MI_BATCH_BUFFER_END);
diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
index 172c6db6..7d99dfd9 100644
--- a/lib/gpu_fill.c
+++ b/lib/gpu_fill.c
@@ -118,26 +118,18 @@ gen7_fill_binding_table(struct intel_batchbuffer *batch,
 
 	binding_table = batch_alloc(batch, 32, 64);
 	offset = batch_offset(batch, binding_table);
-	binding_table[0] = gen7_fill_surface_state(batch, dst,
+	if (IS_GEN7(batch->devid))
+		binding_table[0] = gen7_fill_surface_state(batch, dst,
 						GEN7_SURFACEFORMAT_R8_UNORM, 1);
+	else
+		binding_table[0] = gen8_fill_surface_state(batch, dst,
+						GEN8_SURFACEFORMAT_R8_UNORM, 1);
 
 	return offset;
 }
 
 uint32_t
-gen7_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
+gen7_fill_kernel(struct intel_batchbuffer *batch,
 		const uint32_t kernel[][4],
 		size_t size)
 {
@@ -157,7 +149,7 @@ gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *
 	uint32_t binding_table_offset, kernel_offset;
 
 	binding_table_offset = gen7_fill_binding_table(batch, dst);
-	kernel_offset = gen7_fill_media_kernel(batch, kernel, size);
+	kernel_offset = gen7_fill_kernel(batch, kernel, size);
 
 	idd = batch_alloc(batch, sizeof(*idd), 64);
 	offset = batch_offset(batch, idd);
@@ -272,7 +264,10 @@ gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t in
 	OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
 	OUT_BATCH(0);
 	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
+	if (IS_GEN7(batch->devid))
+		OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
+	else
+		OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
 	/* interface descriptor address, is relative to the dynamics base address */
 	OUT_BATCH(interface_descriptor);
 }
@@ -302,6 +297,8 @@ gen7_emit_media_objects(struct intel_batchbuffer *batch,
 			/* inline data (xoffset, yoffset) */
 			OUT_BATCH(x + i * 16);
 			OUT_BATCH(y + j * 16);
+			if (AT_LEAST_GEN(batch->devid, 8) && !IS_CHERRYVIEW(batch->devid))
+				gen8_emit_media_state_flush(batch);
 		}
 	}
 }
@@ -363,33 +360,6 @@ gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
 	OUT_BATCH(0xffffffff);
 }
 
-void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-
-uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
 uint32_t
 gen8_fill_surface_state(struct intel_batchbuffer *batch,
 			struct igt_buf *buf,
@@ -441,21 +411,6 @@ gen8_fill_surface_state(struct intel_batchbuffer *batch,
 	return offset;
 }
 
-uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst,
-						GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
 uint32_t
 gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size)
 {
@@ -463,8 +418,8 @@ gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *
 	uint32_t offset;
 	uint32_t binding_table_offset, kernel_offset;
 
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen8_fill_media_kernel(batch, kernel, size);
+	binding_table_offset = gen7_fill_binding_table(batch, dst);
+	kernel_offset = gen7_fill_kernel(batch, kernel, size);
 
 	idd = batch_alloc(batch, sizeof(*idd), 64);
 	offset = batch_offset(batch, idd);
@@ -522,10 +477,17 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch)
 	OUT_BATCH(1 << 12 | 1);
 }
 
+void
+gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
+	OUT_BATCH(0);
+}
+
 void
 gen8_emit_vfe_state(struct intel_batchbuffer *batch)
 {
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
+	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
 
 	/* scratch buffer */
 	OUT_BATCH(0);
@@ -550,7 +512,7 @@ gen8_emit_vfe_state(struct intel_batchbuffer *batch)
 void
 gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
 {
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
+	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
 
 	/* scratch buffer */
 	OUT_BATCH(0);
@@ -570,92 +532,6 @@ gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
 	OUT_BATCH(0);
 }
 
-void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-void
-gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
-	OUT_BATCH(0);
-}
-
-void
-gen8_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-			gen8_emit_media_state_flush(batch);
-		}
-	}
-}
-void
-gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-		}
-	}
-}
 void
 gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
 		     unsigned x, unsigned y,
diff --git a/lib/gpu_fill.h b/lib/gpu_fill.h
index 87e62c86..072e9f7c 100644
--- a/lib/gpu_fill.h
+++ b/lib/gpu_fill.h
@@ -70,12 +70,7 @@ gen7_fill_binding_table(struct intel_batchbuffer *batch,
 			struct igt_buf *dst);
 
 uint32_t
-gen7_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size);
-
-uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
+gen7_fill_kernel(struct intel_batchbuffer *batch,
 		const uint32_t kernel[][4],
 		size_t size);
 
@@ -108,53 +103,26 @@ gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
 		     unsigned x, unsigned y,
 		     unsigned width, unsigned height);
 
-void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
-
-uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color);
-
 uint32_t
 gen8_fill_surface_state(struct intel_batchbuffer *batch,
 			struct igt_buf *buf,
 			uint32_t format,
 			int is_dst);
 
-uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst);
-
 uint32_t
 gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size);
 
 void
 gen8_emit_state_base_address(struct intel_batchbuffer *batch);
 
-void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch);
-
-void
-gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
-
-void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
-
-void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor);
-
 void
 gen8_emit_media_state_flush(struct intel_batchbuffer *batch);
 
 void
-gen8_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height);
+gen8_emit_vfe_state(struct intel_batchbuffer *batch);
 
 void
-gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height);
+gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
 
 void
 gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 7c04ccf3..10d4dce8 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -796,12 +796,10 @@ igt_fillfunc_t igt_get_media_fillfunc(int devid)
 
 	if (IS_GEN9(devid))
 		fill = gen9_media_fillfunc;
-	else if (IS_BROADWELL(devid))
+	else if (IS_GEN8(devid))
 		fill = gen8_media_fillfunc;
 	else if (IS_GEN7(devid))
 		fill = gen7_media_fillfunc;
-	else if (IS_CHERRYVIEW(devid))
-		fill = gen8lp_media_fillfunc;
 
 	return fill;
 }
diff --git a/lib/media_fill.h b/lib/media_fill.h
index 226489cb..161af8cf 100644
--- a/lib/media_fill.h
+++ b/lib/media_fill.h
@@ -18,13 +18,6 @@ gen7_media_fillfunc(struct intel_batchbuffer *batch,
                 unsigned width, unsigned height,
                 uint8_t color);
 
-void
-gen8lp_media_fillfunc(struct intel_batchbuffer *batch,
-		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
-		uint8_t color);
-
 void
 gen9_media_fillfunc(struct intel_batchbuffer *batch,
                 struct igt_buf *dst,
diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
index 4270997e..362abd61 100644
--- a/lib/media_fill_gen8.c
+++ b/lib/media_fill_gen8.c
@@ -62,7 +62,7 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
 	/* setup states */
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
-	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
+	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
@@ -73,17 +73,17 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
 
 	gen8_emit_vfe_state(batch);
 
-	gen8_emit_curbe_load(batch, curbe_buffer);
+	gen7_emit_curbe_load(batch, curbe_buffer);
 
-	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
+	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
 
-	gen8_emit_media_objects(batch, x, y, width, height);
+	gen7_emit_media_objects(batch, x, y, width, height);
 
 	OUT_BATCH(MI_BATCH_BUFFER_END);
 
 	batch_end = batch_align(batch, 8);
 	igt_assert(batch_end < BATCH_STATE_SPLIT);
 
-	gen8_render_flush(batch, batch_end);
+	gen7_render_flush(batch, batch_end);
 	intel_batchbuffer_reset(batch);
 }
diff --git a/lib/media_fill_gen8lp.c b/lib/media_fill_gen8lp.c
deleted file mode 100644
index dcc11982..00000000
--- a/lib/media_fill_gen8lp.c
+++ /dev/null
@@ -1,87 +0,0 @@
-#include <intel_bufmgr.h>
-#include <i915_drm.h>
-
-#include "media_fill.h"
-#include "gen8_media.h"
-#include "intel_reg.h"
-#include "drmtest.h"
-#include "gpu_fill.h"
-#include <assert.h>
-
-
-static const uint32_t media_kernel[][4] = {
-	{ 0x00400001, 0x20202288, 0x00000020, 0x00000000 },
-	{ 0x00600001, 0x20800208, 0x008d0000, 0x00000000 },
-	{ 0x00200001, 0x20800208, 0x00450040, 0x00000000 },
-	{ 0x00000001, 0x20880608, 0x00000000, 0x000f000f },
-	{ 0x00800001, 0x20a00208, 0x00000020, 0x00000000 },
-	{ 0x00800001, 0x20e00208, 0x00000020, 0x00000000 },
-	{ 0x00800001, 0x21200208, 0x00000020, 0x00000000 },
-	{ 0x00800001, 0x21600208, 0x00000020, 0x00000000 },
-	{ 0x0c800031, 0x24000a40, 0x0e000080, 0x120a8000 },
-	{ 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
-	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
-};
-
-/*
- * This sets up the media pipeline,
- *
- * +---------------+ <---- 4096
- * |       ^       |
- * |       |       |
- * |    various    |
- * |      state    |
- * |       |       |
- * |_______|_______| <---- 2048 + ?
- * |       ^       |
- * |       |       |
- * |   batch       |
- * |    commands   |
- * |       |       |
- * |       |       |
- * +---------------+ <---- 0 + ?
- *
- */
-
-#define BATCH_STATE_SPLIT 2048
-
-void
-gen8lp_media_fillfunc(struct intel_batchbuffer *batch,
-		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
-		uint8_t color)
-{
-	uint32_t curbe_buffer, interface_descriptor;
-	uint32_t batch_end;
-
-	intel_batchbuffer_flush(batch);
-
-	/* setup states */
-	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
-
-	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
-	igt_assert(batch->ptr < &batch->buffer[4095]);
-
-	/* media pipeline */
-	batch->ptr = batch->buffer;
-	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
-	gen8_emit_state_base_address(batch);
-
-	gen8_emit_vfe_state(batch);
-
-	gen8_emit_curbe_load(batch, curbe_buffer);
-
-	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
-
-	gen8lp_emit_media_objects(batch, x, y, width, height);
-
-	OUT_BATCH(MI_BATCH_BUFFER_END);
-
-	batch_end = batch_align(batch, 8);
-	igt_assert(batch_end < BATCH_STATE_SPLIT);
-
-	gen8_render_flush(batch, batch_end);
-	intel_batchbuffer_reset(batch);
-}
diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
index 6accdbe4..d1335fe6 100644
--- a/lib/media_fill_gen9.c
+++ b/lib/media_fill_gen9.c
@@ -59,7 +59,7 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 	/* setup states */
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
-	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
+	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	assert(batch->ptr < &batch->buffer[4095]);
 
@@ -75,11 +75,11 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 
 	gen8_emit_vfe_state(batch);
 
-	gen8_emit_curbe_load(batch, curbe_buffer);
+	gen7_emit_curbe_load(batch, curbe_buffer);
 
-	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
+	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
 
-	gen8_emit_media_objects(batch, x, y, width, height);
+	gen7_emit_media_objects(batch, x, y, width, height);
 
 	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
 			GEN9_FORCE_MEDIA_AWAKE_DISABLE |
@@ -93,6 +93,6 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 	batch_end = batch_align(batch, 8);
 	assert(batch_end < BATCH_STATE_SPLIT);
 
-	gen8_render_flush(batch, batch_end);
+	gen7_render_flush(batch, batch_end);
 	intel_batchbuffer_reset(batch);
 }
diff --git a/lib/meson.build b/lib/meson.build
index 385e08b9..5f2567fb 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -26,7 +26,6 @@ lib_sources = [
 	'ioctl_wrappers.c',
 	'media_fill_gen7.c',
 	'media_fill_gen8.c',
-	'media_fill_gen8lp.c',
 	'media_fill_gen9.c',
 	'media_spin.c',
 	'gpgpu_fill.c',
-- 
2.14.3

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v4 3/4] lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                   ` (5 preceding siblings ...)
  2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 2/4] lib: Remove duplications in " Katarzyna Dec
@ 2018-04-10 10:48 ` Katarzyna Dec
  2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 4/4] lib: Adjust refactored gpu_fill library to our coding style Katarzyna Dec
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 10:48 UTC (permalink / raw)
  To: igt-dev

There are missing parameters for Gen8 configuration of gpgpu_fill
that are causing GPU hangs on newer hardware. We need to set the
number of threads in TG in gen8_fill_interface_descriptor. This
field was omitted (apparently without any side effects), but
according to bspec from BDW this field cannot be set to 0. We also
need to use pipeline selection mask to gen9_gpgpu_fillfunc, which
is necessary from SKL.

v2: rebased on refactored library
v3: Removed replacing gen7_emit_interface_descriptor_load with gen8
version in gen9_gpgpgu_fillfunc, because during refactoring gen8
function was removed.
v4: rebase on series new version

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 lib/gpgpu_fill.c | 3 ++-
 lib/gpu_fill.c   | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index 579ce78d..5a77ebd4 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -223,7 +223,8 @@ gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = batch->buffer;
 
 	/* GPGPU pipeline */
-	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
+	OUT_BATCH(GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
+		  PIPELINE_SELECT_GPGPU);
 
 	gen9_emit_state_base_address(batch);
 	gen8_emit_vfe_state_gpgpu(batch);
diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
index 7d99dfd9..102f141b 100644
--- a/lib/gpu_fill.c
+++ b/lib/gpu_fill.c
@@ -438,6 +438,8 @@ gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *
 	idd->desc5.constant_urb_entry_read_offset = 0;
 	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
 
+	idd->desc6.num_threads_in_tg = 1;
+
 	return offset;
 }
 
-- 
2.14.3

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v4 4/4] lib: Adjust refactored gpu_fill library to our coding style
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                   ` (6 preceding siblings ...)
  2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 3/4] lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill Katarzyna Dec
@ 2018-04-10 10:48 ` Katarzyna Dec
  2018-04-10 11:28 ` [igt-dev] [PATCH i-g-t v2] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                   ` (7 subsequent siblings)
  15 siblings, 0 replies; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 10:48 UTC (permalink / raw)
  To: igt-dev

While I am making changes in gpgpu and media fill area let's
adjust code to our coding style.

v2: rebased on series new version (patch is now last from
series so change seems larger)
v3: rebased

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 lib/gpgpu_fill.c      |  24 ++++-----
 lib/gpgpu_fill.h      |  12 ++---
 lib/gpu_fill.c        | 142 +++++++++++++++++++++++---------------------------
 lib/media_fill.h      |  20 +++----
 lib/media_fill_gen7.c |   7 ++-
 lib/media_fill_gen8.c |   7 +--
 lib/media_fill_gen9.c |   7 +--
 7 files changed, 105 insertions(+), 114 deletions(-)

diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index 5a77ebd4..72a1445a 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -99,8 +99,8 @@ static const uint32_t gen9_gpgpu_kernel[][4] = {
 void
 gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -120,8 +120,8 @@ gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 
 	interface_descriptor = gen7_fill_interface_descriptor(batch, dst,
-							      gen7_gpgpu_kernel,
-							      sizeof(gen7_gpgpu_kernel));
+				gen7_gpgpu_kernel, sizeof(gen7_gpgpu_kernel));
+
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	batch->ptr = batch->buffer;
@@ -147,8 +147,8 @@ gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 void
 gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -168,8 +168,8 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 
 	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
-							      gen8_gpgpu_kernel,
-							      sizeof(gen8_gpgpu_kernel));
+				gen8_gpgpu_kernel, sizeof(gen8_gpgpu_kernel));
+
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	batch->ptr = batch->buffer;
@@ -195,8 +195,8 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 void
 gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -216,8 +216,8 @@ gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 
 	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
-							      gen9_gpgpu_kernel,
-							      sizeof(gen9_gpgpu_kernel));
+				gen9_gpgpu_kernel, sizeof(gen9_gpgpu_kernel));
+
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	batch->ptr = batch->buffer;
diff --git a/lib/gpgpu_fill.h b/lib/gpgpu_fill.h
index 7b5c8322..f0d188ae 100644
--- a/lib/gpgpu_fill.h
+++ b/lib/gpgpu_fill.h
@@ -30,22 +30,22 @@
 void
 gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color);
 
 void
 gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color);
 
 void
 gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color);
 
 #endif /* GPGPU_FILL_H */
diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
index 102f141b..fc28a945 100644
--- a/lib/gpu_fill.c
+++ b/lib/gpu_fill.c
@@ -10,6 +10,7 @@ uint32_t
 batch_align(struct intel_batchbuffer *batch, uint32_t align)
 {
 	uint32_t offset = batch_used(batch);
+
 	offset = ALIGN(offset, align);
 	batch->ptr = batch->buffer + offset;
 	return offset;
@@ -19,6 +20,7 @@ void *
 batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
 {
 	uint32_t offset = batch_align(batch, align);
+
 	batch->ptr += size;
 	return memset(batch->buffer + offset, 0, size);
 }
@@ -30,9 +32,11 @@ batch_offset(struct intel_batchbuffer *batch, void *ptr)
 }
 
 uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
+batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size,
+	   uint32_t align)
 {
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
+	return batch_offset(batch, memcpy(batch_alloc(batch, size, align),
+			    ptr, size));
 }
 
 void
@@ -43,13 +47,13 @@ gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
 	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
 	if (ret == 0)
 		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
+					    NULL, 0, 0, 0);
 	igt_assert(ret == 0);
 }
 
 uint32_t
 gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
+			    uint8_t color)
 {
 	uint8_t *curbe_buffer;
 	uint32_t offset;
@@ -62,10 +66,8 @@ gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
 }
 
 uint32_t
-gen7_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
+gen7_fill_surface_state(struct intel_batchbuffer *batch, struct igt_buf *buf,
+			uint32_t format, int is_dst)
 {
 	struct gen7_surface_state *ss;
 	uint32_t write_domain, read_domain, offset;
@@ -111,8 +113,7 @@ gen7_fill_surface_state(struct intel_batchbuffer *batch,
 }
 
 uint32_t
-gen7_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
+gen7_fill_binding_table(struct intel_batchbuffer *batch, struct igt_buf *dst)
 {
 	uint32_t *binding_table, offset;
 
@@ -129,9 +130,8 @@ gen7_fill_binding_table(struct intel_batchbuffer *batch,
 }
 
 uint32_t
-gen7_fill_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
+gen7_fill_kernel(struct intel_batchbuffer *batch, const uint32_t kernel[][4],
+		 size_t size)
 {
 	uint32_t offset;
 
@@ -141,8 +141,9 @@ gen7_fill_kernel(struct intel_batchbuffer *batch,
 }
 
 uint32_t
-gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
-			       const uint32_t kernel[][4], size_t size)
+gen7_fill_interface_descriptor(struct intel_batchbuffer *batch,
+			       struct igt_buf *dst, const uint32_t kernel[][4],
+			       size_t size)
 {
 	struct gen7_interface_descriptor_data *idd;
 	uint32_t offset;
@@ -180,16 +181,19 @@ gen7_emit_state_base_address(struct intel_batchbuffer *batch)
 	OUT_BATCH(0);
 
 	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+		  BASE_ADDRESS_MODIFY);
 
 	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+		  BASE_ADDRESS_MODIFY);
 
 	/* indirect */
 	OUT_BATCH(0);
 
 	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+		  BASE_ADDRESS_MODIFY);
 
 	/* general/dynamic/indirect/instruction access Bound */
 	OUT_BATCH(0);
@@ -214,7 +218,7 @@ gen7_emit_vfe_state(struct intel_batchbuffer *batch)
 
 	/* urb entry size & curbe size */
 	OUT_BATCH(2 << 16 | 	/* in 256 bits unit */
-		2);		/* in 256 bits unit */
+		  2);		/* in 256 bits unit */
 
 	/* scoreboard */
 	OUT_BATCH(0);
@@ -268,14 +272,16 @@ gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t in
 		OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
 	else
 		OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
+	/* interface descriptor address, is relative to the dynamics base
+	 * address
+	 */
 	OUT_BATCH(interface_descriptor);
 }
 
 void
 gen7_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
+			unsigned int x, unsigned int y,
+			unsigned int width, unsigned int height)
 {
 	int i, j;
 
@@ -297,7 +303,8 @@ gen7_emit_media_objects(struct intel_batchbuffer *batch,
 			/* inline data (xoffset, yoffset) */
 			OUT_BATCH(x + i * 16);
 			OUT_BATCH(y + j * 16);
-			if (AT_LEAST_GEN(batch->devid, 8) && !IS_CHERRYVIEW(batch->devid))
+			if (AT_LEAST_GEN(batch->devid, 8) &&
+			    !IS_CHERRYVIEW(batch->devid))
 				gen8_emit_media_state_flush(batch);
 		}
 	}
@@ -305,8 +312,8 @@ gen7_emit_media_objects(struct intel_batchbuffer *batch,
 
 void
 gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
-		     unsigned x, unsigned y,
-		     unsigned width, unsigned height)
+		     unsigned int x, unsigned int y,
+		     unsigned int width, unsigned int height)
 {
 	uint32_t x_dim, y_dim, tmp, right_mask;
 
@@ -459,8 +466,8 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch)
 	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
 
 	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		0, BASE_ADDRESS_MODIFY);
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER |
+		  I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
 
 	/* indirect */
 	OUT_BATCH(0);
@@ -475,7 +482,9 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch)
 	OUT_BATCH(1 << 12 | 1);
 	/* indirect object buffer size */
 	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
+	/* instruction buffer size, must set modify enable bit, otherwise it
+	 * may result in GPU hang
+	 */
 	OUT_BATCH(1 << 12 | 1);
 }
 
@@ -536,62 +545,41 @@ gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
 
 void
 gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
-		     unsigned x, unsigned y,
-		     unsigned width, unsigned height)
+		     unsigned int x, unsigned int y,
+		     unsigned int width, unsigned int height)
 {
-	uint32_t x_dim, y_dim, tmp, right_mask;
-
-	/*
-	 * Simply do SIMD16 based dispatch, so every thread uses
-	 * SIMD16 channels.
-	 *
-	 * Define our own thread group size, e.g 16x1 for every group, then
-	 * will have 1 thread each group in SIMD16 dispatch. So thread
-	 * width/height/depth are all 1.
-	 *
-	 * Then thread group X = width / 16 (aligned to 16)
-	 * thread group Y = height;
-	 */
-	x_dim = (width + 15) / 16;
-	y_dim = height;
-
-	tmp = width & 15;
-	if (tmp == 0)
-		right_mask = (1 << 16) - 1;
-	else
-		right_mask = (1 << tmp) - 1;
-
-	OUT_BATCH(GEN7_GPGPU_WALKER | 13);
+	/* general */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
 
-	OUT_BATCH(0); /* kernel offset */
-	OUT_BATCH(0); /* indirect data length */
-	OUT_BATCH(0); /* indirect data offset */
+	/* stateless data port */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
 
-	/* SIMD size, thread w/h/d */
-	OUT_BATCH(1 << 30 | /* SIMD16 */
-		  0 << 16 | /* depth:1 */
-		  0 << 8 | /* height:1 */
-		  0); /* width:1 */
+	/* surface */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
 
-	/* thread group X */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(x_dim);
+	/* dynamic */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER |
+		  I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
 
-	/* thread group Y */
-	OUT_BATCH(0);
+	/* indirect */
 	OUT_BATCH(0);
-	OUT_BATCH(y_dim);
-
-	/* thread group Z */
 	OUT_BATCH(0);
-	OUT_BATCH(1);
 
-	/* right mask */
-	OUT_BATCH(right_mask);
+	/* instruction */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+		  BASE_ADDRESS_MODIFY);
 
-	/* bottom mask, height 1, always 0xffffffff */
-	OUT_BATCH(0xffffffff);
+	/* general state buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* dynamic state buffer size */
+	OUT_BATCH(1 << 12 | 1);
+	/* indirect object buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* instruction buffer size, must set modify enable bit, otherwise it
+	 * may result in GPU hang
+	 */
+	OUT_BATCH(1 << 12 | 1);
 }
 
 void
@@ -626,7 +614,9 @@ gen9_emit_state_base_address(struct intel_batchbuffer *batch)
 	OUT_BATCH(1 << 12 | 1);
 	/* indirect object buffer size */
 	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
+	/* intruction buffer size, must set modify enable bit, otherwise it may
+	 * result in GPU hang
+	 */
 	OUT_BATCH(1 << 12 | 1);
 
 	/* Bindless surface state base address */
diff --git a/lib/media_fill.h b/lib/media_fill.h
index 161af8cf..f6db734e 100644
--- a/lib/media_fill.h
+++ b/lib/media_fill.h
@@ -7,22 +7,22 @@
 void
 gen8_media_fillfunc(struct intel_batchbuffer *batch,
 		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
 		uint8_t color);
 
 void
 gen7_media_fillfunc(struct intel_batchbuffer *batch,
-                struct igt_buf *dst,
-                unsigned x, unsigned y,
-                unsigned width, unsigned height,
-                uint8_t color);
+		struct igt_buf *dst,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
+		uint8_t color);
 
 void
 gen9_media_fillfunc(struct intel_batchbuffer *batch,
-                struct igt_buf *dst,
-                unsigned x, unsigned y,
-                unsigned width, unsigned height,
-                uint8_t color);
+		struct igt_buf *dst,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
+		uint8_t color);
 
 #endif /* RENDE_MEDIA_FILL_H */
diff --git a/lib/media_fill_gen7.c b/lib/media_fill_gen7.c
index c97555a6..5a8c32fb 100644
--- a/lib/media_fill_gen7.c
+++ b/lib/media_fill_gen7.c
@@ -47,8 +47,8 @@ static const uint32_t media_kernel[][4] = {
 void
 gen7_media_fillfunc(struct intel_batchbuffer *batch,
 		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
 		uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -61,8 +61,7 @@ gen7_media_fillfunc(struct intel_batchbuffer *batch,
 
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 	interface_descriptor = gen7_fill_interface_descriptor(batch, dst,
-							      media_kernel,
-							      sizeof(media_kernel));
+					media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
index 362abd61..d6dd7410 100644
--- a/lib/media_fill_gen8.c
+++ b/lib/media_fill_gen8.c
@@ -50,8 +50,8 @@ static const uint32_t media_kernel[][4] = {
 void
 gen8_media_fillfunc(struct intel_batchbuffer *batch,
 		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
 		uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -63,7 +63,8 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
+					media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
index d1335fe6..a9a829f2 100644
--- a/lib/media_fill_gen9.c
+++ b/lib/media_fill_gen9.c
@@ -47,8 +47,8 @@ static const uint32_t media_kernel[][4] = {
 void
 gen9_media_fillfunc(struct intel_batchbuffer *batch,
 		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
 		uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -60,7 +60,8 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
+					media_kernel, sizeof(media_kernel));
 	assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
-- 
2.14.3

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v2] lib/gen6_render: Refactoring lib
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                   ` (7 preceding siblings ...)
  2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 4/4] lib: Adjust refactored gpu_fill library to our coding style Katarzyna Dec
@ 2018-04-10 11:28 ` Lukasz Kalamarz
  2018-04-10 13:28   ` Katarzyna Dec
                     ` (4 more replies)
  2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 0/4] Refactoring of *_fill libraries Katarzyna Dec
                   ` (6 subsequent siblings)
  15 siblings, 5 replies; 28+ messages in thread
From: Lukasz Kalamarz @ 2018-04-10 11:28 UTC (permalink / raw)
  To: igt-dev

This patch is starting a series of refactoring changes for *render*
libs. A lot of code in those libraries is copy/pasted and renamed for
different gen.

Changes made in this patch:
- removal of duplicated registers definitions
- move field definitions above it register definition
- move definitions of register into ascending order
- unify spaces between register name and it's address/value

v2:
- Fix warnings from check_patch script.

Signed-off-by: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 lib/gen6_render.h | 1600 ++++++++++++++++++++++-------------------------------
 1 file changed, 668 insertions(+), 932 deletions(-)

diff --git a/lib/gen6_render.h b/lib/gen6_render.h
index 8a4ec53..82e3f92 100644
--- a/lib/gen6_render.h
+++ b/lib/gen6_render.h
@@ -9,23 +9,94 @@
 					   ((Subopcode) << 16))
 
 #define GEN6_STATE_BASE_ADDRESS			GEN6_3D(0, 1, 1)
-#define GEN6_STATE_SIP				GEN6_3D(0, 1, 2)
+# define BUFFER_SIZE_MODIFY			       (1 << 0)
+# define BASE_ADDRESS_MODIFY			       (1 << 0)
 
+#define GEN6_STATE_SIP				GEN6_3D(0, 1, 2)
 #define GEN6_PIPELINE_SELECT			GEN6_3D(1, 1, 4)
+# define PIPELINE_SELECT_3D				0
+# define PIPELINE_SELECT_MEDIA				1
 
 #define GEN6_MEDIA_STATE_POINTERS		GEN6_3D(2, 0, 0)
 #define GEN6_MEDIA_OBJECT			GEN6_3D(2, 1, 0)
 
 #define GEN6_3DSTATE_BINDING_TABLE_POINTERS	GEN6_3D(3, 0, 1)
-# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS  (1 << 12)/* for GEN6 */
-# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS  (1 << 9) /* for GEN6 */
-# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS  (1 << 8) /* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS          (1 << 12)/* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS          (1 << 9) /* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS          (1 << 8) /* for GEN6 */
+
+#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS	GEN6_3D(3, 0, 2)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS	       (1 << 12)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS	       (1 << 9)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS	       (1 << 8)
+
+#define GEN6_3DSTATE_URB			GEN6_3D(3, 0, 5)
+/* DW1 */
+# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT		16
+# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT	0
+/* DW2 */
+# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT	8
+# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT		0
 
 #define GEN6_3DSTATE_VERTEX_BUFFERS		GEN6_3D(3, 0, 8)
 #define GEN6_3DSTATE_VERTEX_ELEMENTS		GEN6_3D(3, 0, 9)
 #define GEN6_3DSTATE_INDEX_BUFFER		GEN6_3D(3, 0, 0xa)
 #define GEN6_3DSTATE_VF_STATISTICS		GEN6_3D(1, 0, 0xb)
 
+#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS	GEN6_3D(3, 0, 0xd)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC	       (1 << 12)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF	       (1 << 11)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP       (1 << 10)
+
+#define GEN6_3DSTATE_CC_STATE_POINTERS		GEN6_3D(3, 0, 0xe)
+#define GEN6_3DSTATE_VS				GEN6_3D(3, 0, 0x10)
+#define GEN6_3DSTATE_GS				GEN6_3D(3, 0, 0x11)
+/* DW4 */
+# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT	0
+
+#define GEN6_3DSTATE_CLIP			GEN6_3D(3, 0, 0x12)
+#define GEN6_3DSTATE_SF				GEN6_3D(3, 0, 0x13)
+/* DW1 */
+# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT			22
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT		11
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT		4
+/* DW2 */
+/* DW3 */
+# define GEN6_3DSTATE_SF_CULL_BOTH				(0 << 29)
+# define GEN6_3DSTATE_SF_CULL_NONE				(1 << 29)
+# define GEN6_3DSTATE_SF_CULL_FRONT				(2 << 29)
+# define GEN6_3DSTATE_SF_CULL_BACK				(3 << 29)
+/* DW4 */
+# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT			29
+# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT			27
+# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT			25
+# define GEN6_3DSTATE_SF_VERTEX_SUB_PIXEL_PRECISION_SHIFT	12
+
+#define GEN6_3DSTATE_WM				GEN6_3D(3, 0, 0x14)
+/* DW2 */
+# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT			27
+# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
+/* DW4 */
+# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT		16
+/* DW5 */
+# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT			25
+# define GEN6_3DSTATE_WM_DISPATCH_ENABLE			(1 << 19)
+# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE			(1 << 1)
+# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE			(1 << 0)
+/* DW6 */
+# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT			20
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC	(1 << 15)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 14)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC	(1 << 13)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC		(1 << 12)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 11)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC		(1 << 10)
+
+#define GEN6_3DSTATE_CONSTANT_VS		GEN6_3D(3, 0, 0x15)
+#define GEN6_3DSTATE_CONSTANT_GS		GEN6_3D(3, 0, 0x16)
+#define GEN6_3DSTATE_CONSTANT_PS		GEN6_3D(3, 0, 0x17)
+#define GEN6_3DSTATE_SAMPLE_MASK		GEN6_3D(3, 0, 0x18)
+
 #define GEN6_3DSTATE_DRAWING_RECTANGLE		GEN6_3D(3, 1, 0)
 #define GEN6_3DSTATE_CONSTANT_COLOR		GEN6_3D(3, 1, 1)
 #define GEN6_3DSTATE_SAMPLER_PALETTE_LOAD	GEN6_3D(3, 1, 2)
@@ -34,93 +105,14 @@
 # define GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT	29
 # define GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT	18
 
-#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET		GEN6_3D(3, 1, 6)
+#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET	GEN6_3D(3, 1, 6)
 #define GEN6_3DSTATE_POLY_STIPPLE_PATTERN	GEN6_3D(3, 1, 7)
 #define GEN6_3DSTATE_LINE_STIPPLE		GEN6_3D(3, 1, 8)
 #define GEN6_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP	GEN6_3D(3, 1, 9)
 /* These two are BLC and CTG only, not BW or CL */
 #define GEN6_3DSTATE_AA_LINE_PARAMS		GEN6_3D(3, 1, 0xa)
 #define GEN6_3DSTATE_GS_SVB_INDEX		GEN6_3D(3, 1, 0xb)
-#define GEN6_3DSTATE_MONOFILTER_SIZE		GEN6_3D(3, 1, 0x11)
-#define GEN6_3DPRIMITIVE				GEN6_3D(3, 3, 0)
-
-#define GEN6_3DSTATE_CLEAR_PARAMS		GEN6_3D(3, 1, 0x10)
-/* DW1 */
-# define GEN6_3DSTATE_DEPTH_CLEAR_VALID		(1 << 15)
-
-#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS	GEN6_3D(3, 0, 0x02)
-# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS	(1 << 12)
-# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS	(1 << 9)
-# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS	(1 << 8)
-
-#define GEN6_3DSTATE_URB			GEN6_3D(3, 0, 0x05)
-/* DW1 */
-# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT		16
-# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT	0
-/* DW2 */
-# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT	8
-# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT		0
-
-#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS	GEN6_3D(3, 0, 0x0d)
-# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC		(1 << 12)
-# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF		(1 << 11)
-# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP	(1 << 10)
-
-#define GEN6_3DSTATE_CC_STATE_POINTERS		GEN6_3D(3, 0, 0x0e)
-
-#define GEN6_3DSTATE_VS				GEN6_3D(3, 0, 0x10)
-
-#define GEN6_3DSTATE_GS				GEN6_3D(3, 0, 0x11)
-/* DW4 */
-# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT	0
-
-#define GEN6_3DSTATE_CLIP			GEN6_3D(3, 0, 0x12)
-
-#define GEN6_3DSTATE_SF				GEN6_3D(3, 0, 0x13)
-/* DW1 */
-# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT		22
-# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT	11
-# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT	4
-/* DW2 */
-/* DW3 */
-# define GEN6_3DSTATE_SF_CULL_BOTH			(0 << 29)
-# define GEN6_3DSTATE_SF_CULL_NONE			(1 << 29)
-# define GEN6_3DSTATE_SF_CULL_FRONT			(2 << 29)
-# define GEN6_3DSTATE_SF_CULL_BACK			(3 << 29)
-/* DW4 */
-# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT		29
-# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT		27
-# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT		25
-# define GEN6_3DSTATE_SF_VERTEX_SUB_PIXEL_PRECISION_SHIFT 12
-
-#define GEN6_3DSTATE_WM				GEN6_3D(3, 0, 0x14)
-/* DW2 */
-# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT			27
-# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
-/* DW4 */
-# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT		16
-/* DW5 */
-# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT			25
-# define GEN6_3DSTATE_WM_DISPATCH_ENABLE			(1 << 19)
-# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE			(1 << 1)
-# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE			(1 << 0)
-/* DW6 */
-# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT			20
-# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC	(1 << 15)
-# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 14)
-# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC	(1 << 13)
-# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC		(1 << 12)
-# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 11)
-# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC		(1 << 10)
-
-
-#define GEN6_3DSTATE_CONSTANT_VS		GEN6_3D(3, 0, 0x15)
-#define GEN6_3DSTATE_CONSTANT_GS		GEN6_3D(3, 0, 0x16)
-#define GEN6_3DSTATE_CONSTANT_PS		GEN6_3D(3, 0, 0x17)
-
-#define GEN6_3DSTATE_SAMPLE_MASK		GEN6_3D(3, 0, 0x18)
-
-#define GEN6_3DSTATE_MULTISAMPLE		GEN6_3D(3, 1, 0x0d)
+#define GEN6_3DSTATE_MULTISAMPLE		GEN6_3D(3, 1, 0xd)
 /* DW1 */
 # define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER		(0 << 4)
 # define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT	(1 << 4)
@@ -128,648 +120,394 @@
 # define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4			(2 << 1)
 # define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8			(3 << 1)
 
-#define PIPELINE_SELECT_3D		0
-#define PIPELINE_SELECT_MEDIA		1
+#define GEN6_3DSTATE_CLEAR_PARAMS		GEN6_3D(3, 1, 0x10)
+/* DW1 */
+# define GEN6_3DSTATE_DEPTH_CLEAR_VALID		(1 << 15)
 
-/* for GEN6_STATE_BASE_ADDRESS */
-#define BASE_ADDRESS_MODIFY		(1 << 0)
-
-/* for GEN6_PIPE_CONTROL */
-#define GEN6_PIPE_CONTROL_NOWRITE       (0 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_QWORD   (1 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_DEPTH   (2 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_TIME    (3 << 14)
-#define GEN6_PIPE_CONTROL_DEPTH_STALL   (1 << 13)
-#define GEN6_PIPE_CONTROL_WC_FLUSH      (1 << 12)
-#define GEN6_PIPE_CONTROL_IS_FLUSH      (1 << 11)
-#define GEN6_PIPE_CONTROL_TC_FLUSH      (1 << 10)
-#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
-#define GEN6_PIPE_CONTROL_GLOBAL_GTT    (1 << 2)
-#define GEN6_PIPE_CONTROL_LOCAL_PGTT    (0 << 2)
-#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH	(1 << 0)
-
-/* VERTEX_BUFFER_STATE Structure */
-#define VB0_BUFFER_INDEX_SHIFT	26
-#define VB0_VERTEXDATA			(0 << 20)
-#define VB0_INSTANCEDATA		(1 << 20)
-#define VB0_BUFFER_PITCH_SHIFT		0
-#define VB0_NULL_VERTEX_BUFFER          (1 << 13)
-
-/* VERTEX_ELEMENT_STATE Structure */
-#define VE0_VERTEX_BUFFER_INDEX_SHIFT	26 /* for GEN6 */
-#define VE0_VALID			(1 << 25) /* for GEN6 */
-#define VE0_FORMAT_SHIFT		16
-#define VE0_OFFSET_SHIFT		0
-#define VE1_VFCOMPONENT_0_SHIFT		28
-#define VE1_VFCOMPONENT_1_SHIFT		24
-#define VE1_VFCOMPONENT_2_SHIFT		20
-#define VE1_VFCOMPONENT_3_SHIFT		16
-#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT	0
-
-/* 3DPRIMITIVE bits */
-#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
-#define GEN6_3DPRIMITIVE_VERTEX_RANDOM	  (1 << 15)
-/* Primitive types are in gen6_defines.h */
-#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT	  10
-
-#define GEN6_SVG_CTL		       0x7400
-
-#define GEN6_SVG_CTL_GS_BA	       (0 << 8)
-#define GEN6_SVG_CTL_SS_BA	       (1 << 8)
-#define GEN6_SVG_CTL_IO_BA	       (2 << 8)
-#define GEN6_SVG_CTL_GS_AUB	       (3 << 8)
-#define GEN6_SVG_CTL_IO_AUB	       (4 << 8)
-#define GEN6_SVG_CTL_SIP		       (5 << 8)
-
-#define GEN6_SVG_RDATA		       0x7404
-#define GEN6_SVG_WORK_CTL	       0x7408
-
-#define GEN6_VF_CTL		       0x7500
-
-#define GEN6_VF_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID	   (0 << 8)
-#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG	   (1 << 8)
-#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE   (0 << 4)
-#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX	   (1 << 4)
-#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES	   (1 << 3)
-#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE	   (1 << 2)
-#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE	   (1 << 1)
-#define GEN6_VF_CTL_SNAPSHOT_ENABLE	     	   (1 << 0)
-
-#define GEN6_VF_STRG_VAL		       0x7504
-#define GEN6_VF_STR_VL_OVR	       0x7508
-#define GEN6_VF_VC_OVR		       0x750c
-#define GEN6_VF_STR_PSKIP	       0x7510
-#define GEN6_VF_MAX_PRIM		       0x7514
-#define GEN6_VF_RDATA		       0x7518
-
-#define GEN6_VS_CTL		       0x7600
-#define GEN6_VS_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0	   (0 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1	   (1 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT	   (2 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER  (3 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS		   (1 << 2)
-#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE	   (1 << 1)
-#define GEN6_VS_CTL_SNAPSHOT_ENABLE		   (1 << 0)
-
-#define GEN6_VS_STRG_VAL		       0x7604
-#define GEN6_VS_RDATA		       0x7608
-
-#define GEN6_SF_CTL		       0x7b00
-#define GEN6_SF_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID	   (0 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID	   (2 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID	   (4 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT	   (6 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER  (7 << 8)
-#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE  (1 << 4)
-#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE	   (1 << 3)
-#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS		   (1 << 2)
-#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE	   (1 << 1)
-#define GEN6_SF_CTL_SNAPSHOT_ENABLE		   (1 << 0)
-
-#define GEN6_SF_STRG_VAL		       0x7b04
-#define GEN6_SF_RDATA		       0x7b18
-
-#define GEN6_WIZ_CTL		       0x7c00
-#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT	   16
-#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER   (0 << 8)
-#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE     (1 << 8)
-#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE   (2 << 8)
-#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH	      (1 << 6)
-#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS    (1 << 5)
-#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE   (1 << 4)
-#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG	      (1 << 3)
-#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS	      (1 << 2)
-#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE	      (1 << 1)
-#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE		      (1 << 0)
-
-#define GEN6_WIZ_STRG_VAL			      0x7c04
-#define GEN6_WIZ_RDATA				      0x7c18
-
-#define GEN6_TS_CTL		       0x7e00
-#define GEN6_TS_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR	   (0 << 8)
-#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR   (3 << 8)
-#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS	   (1 << 2)
-#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS  	   (1 << 1)
-#define GEN6_TS_CTL_SNAPSHOT_ENABLE		   (1 << 0)
-
-#define GEN6_TS_STRG_VAL		       0x7e04
-#define GEN6_TS_RDATA		       0x7e08
-
-#define GEN6_TD_CTL_MUX_SHIFT	       8
-#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH	   (1 << 7)
-#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT		   (1 << 6)
-#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE	   (1 << 5)
-#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE  (1 << 4)
-#define GEN6_TD_CTL_BREAKPOINT_ENABLE		   (1 << 2)
-#define GEN6_TD_CTL2		       0x8004
-#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
-#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE      (1 << 26)
-#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE	      (1 << 25)
-#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT	      16
-#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE	      (1 << 8)
-#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
-#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE	      (1 << 6)
-#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE	      (1 << 5)
-#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE     (1 << 4)
-#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE	      (1 << 3)
-#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE	      (1 << 0)
-#define GEN6_TD_VF_VS_EMSK	       0x8008
-#define GEN6_TD_GS_EMSK		       0x800c
-#define GEN6_TD_CLIP_EMSK	       0x8010
-#define GEN6_TD_SF_EMSK		       0x8014
-#define GEN6_TD_WIZ_EMSK		       0x8018
-#define GEN6_TD_0_6_EHTRG_VAL	       0x801c
-#define GEN6_TD_0_7_EHTRG_VAL	       0x8020
-#define GEN6_TD_0_6_EHTRG_MSK           0x8024
-#define GEN6_TD_0_7_EHTRG_MSK	       0x8028
-#define GEN6_TD_RDATA		       0x802c
-#define GEN6_TD_TS_EMSK		       0x8030
-
-#define GEN6_EU_CTL		       0x8800
-#define GEN6_EU_CTL_SELECT_SHIFT	       16
-#define GEN6_EU_CTL_DATA_MUX_SHIFT      8
-#define GEN6_EU_ATT_0		       0x8810
-#define GEN6_EU_ATT_1		       0x8814
-#define GEN6_EU_ATT_DATA_0	       0x8820
-#define GEN6_EU_ATT_DATA_1	       0x8824
-#define GEN6_EU_ATT_CLR_0	       0x8830
-#define GEN6_EU_ATT_CLR_1	       0x8834
-#define GEN6_EU_RDATA		       0x8840
+#define GEN6_3DSTATE_MONOFILTER_SIZE		GEN6_3D(3, 1, 0x11)
 
 #define GEN6_PIPE_CONTROL			GEN6_3D(3, 2, 0)
+# define GEN6_PIPE_CONTROL_NOWRITE		(0 << 14)
+# define GEN6_PIPE_CONTROL_WRITE_QWORD		(1 << 14)
+# define GEN6_PIPE_CONTROL_WRITE_DEPTH		(2 << 14)
+# define GEN6_PIPE_CONTROL_WRITE_TIME		(3 << 14)
+# define GEN6_PIPE_CONTROL_DEPTH_STALL		(1 << 13)
+# define GEN6_PIPE_CONTROL_WC_FLUSH		(1 << 12)
+# define GEN6_PIPE_CONTROL_IS_FLUSH		(1 << 11)
+# define GEN6_PIPE_CONTROL_TC_FLUSH		(1 << 10)
+# define GEN6_PIPE_CONTROL_NOTIFY_ENABLE	(1 << 8)
+# define GEN6_PIPE_CONTROL_GLOBAL_GTT		(1 << 2)
+# define GEN6_PIPE_CONTROL_LOCAL_PGTT		(0 << 2)
+# define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH	(1 << 0)
 
-#define GEN6_3DPRIMITIVE				GEN6_3D(3, 3, 0)
+#define GEN6_3DPRIMITIVE			GEN6_3D(3, 3, 0)
+# define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL	(0 << 15)
+# define GEN6_3DPRIMITIVE_VERTEX_RANDOM		(1 << 15)
+/* Primitive types are in gen6_defines.h */
+# define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT	10
 
-#define GEN6_3DSTATE_CLEAR_PARAMS		GEN6_3D(3, 1, 0x10)
-/* DW1 */
-# define GEN6_3DSTATE_DEPTH_CLEAR_VALID		(1 << 15)
+/* VERTEX_BUFFER_STATE Structure */
+# define VB0_BUFFER_INDEX_SHIFT	26
+# define VB0_VERTEXDATA				(0 << 20)
+# define VB0_INSTANCEDATA			(1 << 20)
+# define VB0_BUFFER_PITCH_SHIFT			0
+# define VB0_NULL_VERTEX_BUFFER			(1 << 13)
 
-/* for GEN6+ */
-#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS	GEN6_3D(3, 0, 0x02)
-# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS	(1 << 12)
-# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS	(1 << 9)
-# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS	(1 << 8)
+/* VERTEX_ELEMENT_STATE Structure */
+# define VE0_VERTEX_BUFFER_INDEX_SHIFT		26 /* for GEN6 */
+# define VE0_VALID				(1 << 25) /* for GEN6 */
+# define VE0_FORMAT_SHIFT			16
+# define VE0_OFFSET_SHIFT			0
+# define VE1_VFCOMPONENT_0_SHIFT		28
+# define VE1_VFCOMPONENT_1_SHIFT		24
+# define VE1_VFCOMPONENT_2_SHIFT		20
+# define VE1_VFCOMPONENT_3_SHIFT		16
+# define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT	0
 
-#define GEN6_3DSTATE_URB			GEN6_3D(3, 0, 0x05)
-/* DW1 */
-# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT		16
-# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT	0
-/* DW2 */
-# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT	8
-# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT		0
+#define GEN6_SVG_CTL				0x7400
+# define GEN6_SVG_CTL_GS_BA			(0 << 8)
+# define GEN6_SVG_CTL_SS_BA			(1 << 8)
+# define GEN6_SVG_CTL_IO_BA			(2 << 8)
+# define GEN6_SVG_CTL_GS_AUB			(3 << 8)
+# define GEN6_SVG_CTL_IO_AUB			(4 << 8)
+# define GEN6_SVG_CTL_SIP			(5 << 8)
 
-#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS	GEN6_3D(3, 0, 0x0d)
-# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC		(1 << 12)
-# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF		(1 << 11)
-# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP	(1 << 10)
+#define GEN6_SVG_RDATA				0x7404
+#define GEN6_SVG_WORK_CTL			0x7408
+#define GEN6_VF_CTL				0x7500
+# define GEN6_VF_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
+# define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID  (0 << 8)
+# define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG  (1 << 8)
+# define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
+# define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX	   (1 << 4)
+# define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES	   (1 << 3)
+# define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE   (1 << 2)
+# define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE	   (1 << 1)
+# define GEN6_VF_CTL_SNAPSHOT_ENABLE		   (1 << 0)
 
-#define GEN6_3DSTATE_CC_STATE_POINTERS		GEN6_3D(3, 0, 0x0e)
+#define GEN6_VF_STRG_VAL			0x7504
+#define GEN6_VF_STR_VL_OVR			0x7508
+#define GEN6_VF_VC_OVR				0x750c
+#define GEN6_VF_STR_PSKIP			0x7510
+#define GEN6_VF_MAX_PRIM			0x7514
+#define GEN6_VF_RDATA				0x7518
+#define GEN6_VS_CTL				0x7600
+# define GEN6_VS_CTL_SNAPSHOT_COMPLETE		    (1 << 31)
+# define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0	    (0 << 8)
+# define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1	    (1 << 8)
+# define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT	    (2 << 8)
+# define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
+# define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS	    (1 << 2)
+# define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE	    (1 << 1)
+# define GEN6_VS_CTL_SNAPSHOT_ENABLE		    (1 << 0)
 
-#define GEN6_3DSTATE_VS				GEN6_3D(3, 0, 0x10)
+#define GEN6_VS_STRG_VAL			0x7604
+#define GEN6_VS_RDATA				0x7608
+#define GEN6_SF_CTL				0x7b00
+# define GEN6_SF_CTL_SNAPSHOT_COMPLETE			(1 << 31)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID	(0 << 8)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT	(1 << 8)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID	(2 << 8)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT	(3 << 8)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID	(4 << 8)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT	(5 << 8)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT		(6 << 8)
+# define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER	(7 << 8)
+# define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE	(1 << 4)
+# define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE	(1 << 3)
+# define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS		(1 << 2)
+# define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE		(1 << 1)
+# define GEN6_SF_CTL_SNAPSHOT_ENABLE			(1 << 0)
 
-#define GEN6_3DSTATE_GS				GEN6_3D(3, 0, 0x11)
-/* DW4 */
-# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT	0
+#define GEN6_SF_STRG_VAL			0x7b04
+#define GEN6_SF_RDATA				0x7b18
+#define GEN6_WIZ_CTL				0x7c00
+# define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE			(1 << 31)
+# define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT		16
+# define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER   (0 << 8)
+# define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE     (1 << 8)
+# define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE   (2 << 8)
+# define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH		(1 << 6)
+# define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS    (1 << 5)
+# define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE   (1 << 4)
+# define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG	(1 << 3)
+# define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS		(1 << 2)
+# define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE		(1 << 1)
+# define GEN6_WIZ_CTL_SNAPSHOT_ENABLE			(1 << 0)
 
-#define GEN6_3DSTATE_CLIP			GEN6_3D(3, 0, 0x12)
+#define GEN6_WIZ_STRG_VAL			0x7c04
+#define GEN6_WIZ_RDATA				0x7c18
+#define GEN6_TS_CTL				0x7e00
+# define GEN6_TS_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
+# define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR	   (0 << 8)
+# define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
+# define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS	   (1 << 2)
+# define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS	   (1 << 1)
+# define GEN6_TS_CTL_SNAPSHOT_ENABLE		   (1 << 0)
 
-#define GEN6_3DSTATE_SF				GEN6_3D(3, 0, 0x13)
-/* DW1 */
-# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT		22
-# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT	11
-# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT	4
-/* DW2 */
-/* DW3 */
-# define GEN6_3DSTATE_SF_CULL_BOTH			(0 << 29)
-# define GEN6_3DSTATE_SF_CULL_NONE			(1 << 29)
-# define GEN6_3DSTATE_SF_CULL_FRONT			(2 << 29)
-# define GEN6_3DSTATE_SF_CULL_BACK			(3 << 29)
-/* DW4 */
-# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT		29
-# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT		27
-# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT		25
+#define GEN6_TS_STRG_VAL			0x7e04
+#define GEN6_TS_RDATA				0x7e08
 
+# define GEN6_TD_CTL_MUX_SHIFT				8
+# define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH	(1 << 7)
+# define GEN6_TD_CTL_FORCE_EXTERNAL_HALT		(1 << 6)
+# define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE		(1 << 5)
+# define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE	(1 << 4)
+# define GEN6_TD_CTL_BREAKPOINT_ENABLE			(1 << 2)
 
-#define GEN6_3DSTATE_WM				GEN6_3D(3, 0, 0x14)
-/* DW2 */
-# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT			27
-# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
-/* DW4 */
-# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT		16
-/* DW5 */
-# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT			25
-# define GEN6_3DSTATE_WM_DISPATCH_ENABLE			(1 << 19)
-# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE			(1 << 1)
-# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE			(1 << 0)
-/* DW6 */
-# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT			20
-# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC	(1 << 15)
-# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 14)
-# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC	(1 << 13)
-# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC		(1 << 12)
-# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 11)
-# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC		(1 << 10)
+# define GEN6_TD_CTL2				0x8004
+# define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE	    (1 << 28)
+# define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE	    (1 << 26)
+# define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE	    (1 << 25)
+# define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT		    16
+# define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE	    (1 << 8)
+# define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE  (1 << 7)
+# define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE		    (1 << 6)
+# define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE		    (1 << 5)
+# define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE	    (1 << 4)
+# define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE		    (1 << 3)
+# define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE		    (1 << 0)
 
+#define GEN6_TD_VF_VS_EMSK			0x8008
+#define GEN6_TD_GS_EMSK				0x800c
+#define GEN6_TD_CLIP_EMSK			0x8010
+#define GEN6_TD_SF_EMSK				0x8014
+#define GEN6_TD_WIZ_EMSK			0x8018
+#define GEN6_TD_0_6_EHTRG_VAL			0x801c
+#define GEN6_TD_0_7_EHTRG_VAL			0x8020
+#define GEN6_TD_0_6_EHTRG_MSK			0x8024
+#define GEN6_TD_0_7_EHTRG_MSK			0x8028
+#define GEN6_TD_RDATA				0x802c
+#define GEN6_TD_TS_EMSK				0x8030
+#define GEN6_EU_CTL				0x8800
+# define GEN6_EU_CTL_SELECT_SHIFT		16
+# define GEN6_EU_CTL_DATA_MUX_SHIFT		8
 
-#define GEN6_3DSTATE_CONSTANT_VS		GEN6_3D(3, 0, 0x15)
-#define GEN6_3DSTATE_CONSTANT_GS          	GEN6_3D(3, 0, 0x16)
-#define GEN6_3DSTATE_CONSTANT_PS          	GEN6_3D(3, 0, 0x17)
+#define GEN6_EU_ATT_0				0x8810
+#define GEN6_EU_ATT_1				0x8814
+#define GEN6_EU_ATT_DATA_0			0x8820
+#define GEN6_EU_ATT_DATA_1			0x8824
+#define GEN6_EU_ATT_CLR_0			0x8830
+#define GEN6_EU_ATT_CLR_1			0x8834
+#define GEN6_EU_RDATA				0x8840
 
-#define GEN6_3DSTATE_SAMPLE_MASK		GEN6_3D(3, 0, 0x18)
-
-#define GEN6_3DSTATE_MULTISAMPLE		GEN6_3D(3, 1, 0x0d)
-/* DW1 */
-# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER		(0 << 4)
-# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT	(1 << 4)
-# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1			(0 << 1)
-# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4			(2 << 1)
-# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8			(3 << 1)
-
-#define PIPELINE_SELECT_3D		0
-#define PIPELINE_SELECT_MEDIA		1
-
-#define UF0_CS_REALLOC			(1 << 13)
-#define UF0_VFE_REALLOC			(1 << 12)
-#define UF0_SF_REALLOC			(1 << 11)
-#define UF0_CLIP_REALLOC		(1 << 10)
-#define UF0_GS_REALLOC			(1 << 9)
-#define UF0_VS_REALLOC			(1 << 8)
-#define UF1_CLIP_FENCE_SHIFT		20
-#define UF1_GS_FENCE_SHIFT		10
-#define UF1_VS_FENCE_SHIFT		0
-#define UF2_CS_FENCE_SHIFT		20
-#define UF2_VFE_FENCE_SHIFT		10
-#define UF2_SF_FENCE_SHIFT		0
-
-/* for GEN6_STATE_BASE_ADDRESS */
-#define BASE_ADDRESS_MODIFY		(1 << 0)
-#define BUFFER_SIZE_MODIFY		(1 << 0)
+#define UF0_CS_REALLOC				(1 << 13)
+#define UF0_VFE_REALLOC				(1 << 12)
+#define UF0_SF_REALLOC				(1 << 11)
+#define UF0_CLIP_REALLOC			(1 << 10)
+#define UF0_GS_REALLOC				(1 << 9)
+#define UF0_VS_REALLOC				(1 << 8)
+#define UF1_CLIP_FENCE_SHIFT			20
+#define UF1_GS_FENCE_SHIFT			10
+#define UF1_VS_FENCE_SHIFT			0
+#define UF2_CS_FENCE_SHIFT			20
+#define UF2_VFE_FENCE_SHIFT			10
+#define UF2_SF_FENCE_SHIFT			0
 
 /* for GEN6_3DSTATE_PIPELINED_POINTERS */
-#define GEN6_GS_DISABLE		       0
-#define GEN6_GS_ENABLE		       1
-#define GEN6_CLIP_DISABLE	       0
-#define GEN6_CLIP_ENABLE		       1
+#define GEN6_GS_DISABLE				0
+#define GEN6_GS_ENABLE				1
+#define GEN6_CLIP_DISABLE			0
+#define GEN6_CLIP_ENABLE			1
 
-/* for GEN6_PIPE_CONTROL */
-#define GEN6_PIPE_CONTROL_NOWRITE       (0 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_QWORD   (1 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_DEPTH   (2 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_TIME    (3 << 14)
-#define GEN6_PIPE_CONTROL_DEPTH_STALL   (1 << 13)
-#define GEN6_PIPE_CONTROL_WC_FLUSH      (1 << 12)
-#define GEN6_PIPE_CONTROL_IS_FLUSH      (1 << 11)
-#define GEN6_PIPE_CONTROL_TC_FLUSH      (1 << 10)
-#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
-#define GEN6_PIPE_CONTROL_GLOBAL_GTT    (1 << 2)
-#define GEN6_PIPE_CONTROL_LOCAL_PGTT    (0 << 2)
-#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH	(1 << 0)
+/* 3D state */
+#define _3DOP_3DSTATE_PIPELINED			0x0
+#define _3DOP_3DSTATE_NONPIPELINED		0x1
+#define _3DOP_3DCONTROL				0x2
+#define _3DOP_3DPRIMITIVE			0x3
 
-/* 3DPRIMITIVE bits */
-#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
-#define GEN6_3DPRIMITIVE_VERTEX_RANDOM	  (1 << 15)
-/* Primitive types are in gen6_defines.h */
-#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT	  10
+#define _3DSTATE_PIPELINED_POINTERS		0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS		0x01
+#define _3DSTATE_VERTEX_BUFFERS			0x08
+#define _3DSTATE_VERTEX_ELEMENTS		0x09
+#define _3DSTATE_INDEX_BUFFER			0x0A
+#define _3DSTATE_VF_STATISTICS			0x0B
+#define _3DSTATE_DRAWING_RECTANGLE		0x00
+#define _3DSTATE_CONSTANT_COLOR			0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD		0x02
+#define _3DSTATE_CHROMA_KEY			0x04
+#define _3DSTATE_DEPTH_BUFFER			0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET		0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN		0x07
+#define _3DSTATE_LINE_STIPPLE			0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP	0x09
+#define _3DCONTROL				0x00
+#define _3DPRIMITIVE				0x00
 
-#define GEN6_SVG_CTL		       0x7400
+#define _3DPRIM_POINTLIST			0x01
+#define _3DPRIM_LINELIST			0x02
+#define _3DPRIM_LINESTRIP			0x03
+#define _3DPRIM_TRILIST				0x04
+#define _3DPRIM_TRISTRIP			0x05
+#define _3DPRIM_TRIFAN				0x06
+#define _3DPRIM_QUADLIST			0x07
+#define _3DPRIM_QUADSTRIP			0x08
+#define _3DPRIM_LINELIST_ADJ			0x09
+#define _3DPRIM_LINESTRIP_ADJ			0x0A
+#define _3DPRIM_TRILIST_ADJ			0x0B
+#define _3DPRIM_TRISTRIP_ADJ			0x0C
+#define _3DPRIM_TRISTRIP_REVERSE		0x0D
+#define _3DPRIM_POLYGON				0x0E
+#define _3DPRIM_RECTLIST			0x0F
+#define _3DPRIM_LINELOOP			0x10
+#define _3DPRIM_POINTLIST_BF			0x11
+#define _3DPRIM_LINESTRIP_CONT			0x12
+#define _3DPRIM_LINESTRIP_BF			0x13
+#define _3DPRIM_LINESTRIP_CONT_BF		0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE		0x15
 
-#define GEN6_SVG_CTL_GS_BA	       (0 << 8)
-#define GEN6_SVG_CTL_SS_BA	       (1 << 8)
-#define GEN6_SVG_CTL_IO_BA	       (2 << 8)
-#define GEN6_SVG_CTL_GS_AUB	       (3 << 8)
-#define GEN6_SVG_CTL_IO_AUB	       (4 << 8)
-#define GEN6_SVG_CTL_SIP		       (5 << 8)
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL	0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM	1
 
-#define GEN6_SVG_RDATA		       0x7404
-#define GEN6_SVG_WORK_CTL	       0x7408
+#define GEN6_ANISORATIO_2			0
+#define GEN6_ANISORATIO_4			1
+#define GEN6_ANISORATIO_6			2
+#define GEN6_ANISORATIO_8			3
+#define GEN6_ANISORATIO_10			4
+#define GEN6_ANISORATIO_12			5
+#define GEN6_ANISORATIO_14			6
+#define GEN6_ANISORATIO_16			7
 
-#define GEN6_VF_CTL		       0x7500
+#define GEN6_BLENDFACTOR_ONE			0x1
+#define GEN6_BLENDFACTOR_SRC_COLOR		0x2
+#define GEN6_BLENDFACTOR_SRC_ALPHA		0x3
+#define GEN6_BLENDFACTOR_DST_ALPHA		0x4
+#define GEN6_BLENDFACTOR_DST_COLOR		0x5
+#define GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE	0x6
+#define GEN6_BLENDFACTOR_CONST_COLOR		0x7
+#define GEN6_BLENDFACTOR_CONST_ALPHA		0x8
+#define GEN6_BLENDFACTOR_SRC1_COLOR		0x9
+#define GEN6_BLENDFACTOR_SRC1_ALPHA		0x0A
+#define GEN6_BLENDFACTOR_ZERO			0x11
+#define GEN6_BLENDFACTOR_INV_SRC_COLOR		0x12
+#define GEN6_BLENDFACTOR_INV_SRC_ALPHA		0x13
+#define GEN6_BLENDFACTOR_INV_DST_ALPHA		0x14
+#define GEN6_BLENDFACTOR_INV_DST_COLOR		0x15
+#define GEN6_BLENDFACTOR_INV_CONST_COLOR	0x17
+#define GEN6_BLENDFACTOR_INV_CONST_ALPHA	0x18
+#define GEN6_BLENDFACTOR_INV_SRC1_COLOR		0x19
+#define GEN6_BLENDFACTOR_INV_SRC1_ALPHA		0x1A
 
-#define GEN6_VF_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID	   (0 << 8)
-#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG	   (1 << 8)
-#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE   (0 << 4)
-#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX	   (1 << 4)
-#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES	   (1 << 3)
-#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE	   (1 << 2)
-#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE	   (1 << 1)
-#define GEN6_VF_CTL_SNAPSHOT_ENABLE	     	   (1 << 0)
+#define GEN6_BLENDFUNCTION_ADD			0
+#define GEN6_BLENDFUNCTION_SUBTRACT		1
+#define GEN6_BLENDFUNCTION_REVERSE_SUBTRACT	2
+#define GEN6_BLENDFUNCTION_MIN			3
+#define GEN6_BLENDFUNCTION_MAX			4
 
-#define GEN6_VF_STRG_VAL		       0x7504
-#define GEN6_VF_STR_VL_OVR	       0x7508
-#define GEN6_VF_VC_OVR		       0x750c
-#define GEN6_VF_STR_PSKIP	       0x7510
-#define GEN6_VF_MAX_PRIM		       0x7514
-#define GEN6_VF_RDATA		       0x7518
+#define GEN6_ALPHATEST_FORMAT_UNORM8		0
+#define GEN6_ALPHATEST_FORMAT_FLOAT32		1
 
-#define GEN6_VS_CTL		       0x7600
-#define GEN6_VS_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0	   (0 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1	   (1 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT	   (2 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER  (3 << 8)
-#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS		   (1 << 2)
-#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE	   (1 << 1)
-#define GEN6_VS_CTL_SNAPSHOT_ENABLE		   (1 << 0)
+#define GEN6_CHROMAKEY_KILL_ON_ANY_MATCH	0
+#define GEN6_CHROMAKEY_REPLACE_BLACK		1
 
-#define GEN6_VS_STRG_VAL		       0x7604
-#define GEN6_VS_RDATA		       0x7608
+#define GEN6_CLIP_API_OGL			0
+#define GEN6_CLIP_API_DX			1
 
-#define GEN6_SF_CTL		       0x7b00
-#define GEN6_SF_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID	   (0 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID	   (2 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID	   (4 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT	   (6 << 8)
-#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER  (7 << 8)
-#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE  (1 << 4)
-#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE	   (1 << 3)
-#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS		   (1 << 2)
-#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE	   (1 << 1)
-#define GEN6_SF_CTL_SNAPSHOT_ENABLE		   (1 << 0)
+#define GEN6_CLIPMODE_NORMAL			0
+#define GEN6_CLIPMODE_CLIP_ALL			1
+#define GEN6_CLIPMODE_CLIP_NON_REJECTED		2
+#define GEN6_CLIPMODE_REJECT_ALL		3
+#define GEN6_CLIPMODE_ACCEPT_ALL		4
 
-#define GEN6_SF_STRG_VAL		       0x7b04
-#define GEN6_SF_RDATA		       0x7b18
+#define GEN6_CLIP_NDCSPACE			0
+#define GEN6_CLIP_SCREENSPACE			1
 
-#define GEN6_WIZ_CTL		       0x7c00
-#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT	   16
-#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER   (0 << 8)
-#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE     (1 << 8)
-#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE   (2 << 8)
-#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH	      (1 << 6)
-#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS    (1 << 5)
-#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE   (1 << 4)
-#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG	      (1 << 3)
-#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS	      (1 << 2)
-#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE	      (1 << 1)
-#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE		      (1 << 0)
+#define GEN6_COMPAREFUNCTION_ALWAYS		0
+#define GEN6_COMPAREFUNCTION_NEVER		1
+#define GEN6_COMPAREFUNCTION_LESS		2
+#define GEN6_COMPAREFUNCTION_EQUAL		3
+#define GEN6_COMPAREFUNCTION_LEQUAL		4
+#define GEN6_COMPAREFUNCTION_GREATER		5
+#define GEN6_COMPAREFUNCTION_NOTEQUAL		6
+#define GEN6_COMPAREFUNCTION_GEQUAL		7
 
-#define GEN6_WIZ_STRG_VAL			      0x7c04
-#define GEN6_WIZ_RDATA				      0x7c18
+#define GEN6_COVERAGE_PIXELS_HALF		0
+#define GEN6_COVERAGE_PIXELS_1			1
+#define GEN6_COVERAGE_PIXELS_2			2
+#define GEN6_COVERAGE_PIXELS_4			3
 
-#define GEN6_TS_CTL		       0x7e00
-#define GEN6_TS_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
-#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR	   (0 << 8)
-#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR   (3 << 8)
-#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS	   (1 << 2)
-#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS  	   (1 << 1)
-#define GEN6_TS_CTL_SNAPSHOT_ENABLE		   (1 << 0)
+#define GEN6_CULLMODE_BOTH			0
+#define GEN6_CULLMODE_NONE			1
+#define GEN6_CULLMODE_FRONT			2
+#define GEN6_CULLMODE_BACK			3
 
-#define GEN6_TS_STRG_VAL		       0x7e04
-#define GEN6_TS_RDATA		       0x7e08
+#define GEN6_DEFAULTCOLOR_R8G8B8A8_UNORM	0
+#define GEN6_DEFAULTCOLOR_R32G32B32A32_FLOAT	1
 
-#define GEN6_TD_CTL_MUX_SHIFT	       8
-#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH	   (1 << 7)
-#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT		   (1 << 6)
-#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE	   (1 << 5)
-#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE  (1 << 4)
-#define GEN6_TD_CTL_BREAKPOINT_ENABLE		   (1 << 2)
-#define GEN6_TD_CTL2		       0x8004
-#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
-#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE      (1 << 26)
-#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE	      (1 << 25)
-#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT	      16
-#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE	      (1 << 8)
-#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
-#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE	      (1 << 6)
-#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE	      (1 << 5)
-#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE     (1 << 4)
-#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE	      (1 << 3)
-#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE	      (1 << 0)
-#define GEN6_TD_VF_VS_EMSK	       0x8008
-#define GEN6_TD_GS_EMSK		       0x800c
-#define GEN6_TD_CLIP_EMSK	       0x8010
-#define GEN6_TD_SF_EMSK		       0x8014
-#define GEN6_TD_WIZ_EMSK		       0x8018
-#define GEN6_TD_0_6_EHTRG_VAL	       0x801c
-#define GEN6_TD_0_7_EHTRG_VAL	       0x8020
-#define GEN6_TD_0_6_EHTRG_MSK           0x8024
-#define GEN6_TD_0_7_EHTRG_MSK	       0x8028
-#define GEN6_TD_RDATA		       0x802c
-#define GEN6_TD_TS_EMSK		       0x8030
+#define GEN6_DEPTHFORMAT_D32_FLOAT_S8X24_UINT	0
+#define GEN6_DEPTHFORMAT_D32_FLOAT		1
+#define GEN6_DEPTHFORMAT_D24_UNORM_S8_UINT	2
+#define GEN6_DEPTHFORMAT_D16_UNORM		5
 
-#define GEN6_EU_CTL		       0x8800
-#define GEN6_EU_CTL_SELECT_SHIFT	       16
-#define GEN6_EU_CTL_DATA_MUX_SHIFT      8
-#define GEN6_EU_ATT_0		       0x8810
-#define GEN6_EU_ATT_1		       0x8814
-#define GEN6_EU_ATT_DATA_0	       0x8820
-#define GEN6_EU_ATT_DATA_1	       0x8824
-#define GEN6_EU_ATT_CLR_0	       0x8830
-#define GEN6_EU_ATT_CLR_1	       0x8834
-#define GEN6_EU_RDATA		       0x8840
+#define GEN6_FLOATING_POINT_IEEE_754		0
+#define GEN6_FLOATING_POINT_NON_IEEE_754	1
 
-/* 3D state:
- */
-#define _3DOP_3DSTATE_PIPELINED       0x0
-#define _3DOP_3DSTATE_NONPIPELINED    0x1
-#define _3DOP_3DCONTROL               0x2
-#define _3DOP_3DPRIMITIVE             0x3
+#define GEN6_FRONTWINDING_CW			0
+#define GEN6_FRONTWINDING_CCW			1
 
-#define _3DSTATE_PIPELINED_POINTERS       0x00
-#define _3DSTATE_BINDING_TABLE_POINTERS   0x01
-#define _3DSTATE_VERTEX_BUFFERS           0x08
-#define _3DSTATE_VERTEX_ELEMENTS          0x09
-#define _3DSTATE_INDEX_BUFFER             0x0A
-#define _3DSTATE_VF_STATISTICS            0x0B
-#define _3DSTATE_DRAWING_RECTANGLE            0x00
-#define _3DSTATE_CONSTANT_COLOR               0x01
-#define _3DSTATE_SAMPLER_PALETTE_LOAD         0x02
-#define _3DSTATE_CHROMA_KEY                   0x04
-#define _3DSTATE_DEPTH_BUFFER                 0x05
-#define _3DSTATE_POLY_STIPPLE_OFFSET          0x06
-#define _3DSTATE_POLY_STIPPLE_PATTERN         0x07
-#define _3DSTATE_LINE_STIPPLE                 0x08
-#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP    0x09
-#define _3DCONTROL    0x00
-#define _3DPRIMITIVE  0x00
+#define GEN6_INDEX_BYTE				0
+#define GEN6_INDEX_WORD				1
+#define GEN6_INDEX_DWORD			2
 
-#define _3DPRIM_POINTLIST         0x01
-#define _3DPRIM_LINELIST          0x02
-#define _3DPRIM_LINESTRIP         0x03
-#define _3DPRIM_TRILIST           0x04
-#define _3DPRIM_TRISTRIP          0x05
-#define _3DPRIM_TRIFAN            0x06
-#define _3DPRIM_QUADLIST          0x07
-#define _3DPRIM_QUADSTRIP         0x08
-#define _3DPRIM_LINELIST_ADJ      0x09
-#define _3DPRIM_LINESTRIP_ADJ     0x0A
-#define _3DPRIM_TRILIST_ADJ       0x0B
-#define _3DPRIM_TRISTRIP_ADJ      0x0C
-#define _3DPRIM_TRISTRIP_REVERSE  0x0D
-#define _3DPRIM_POLYGON           0x0E
-#define _3DPRIM_RECTLIST          0x0F
-#define _3DPRIM_LINELOOP          0x10
-#define _3DPRIM_POINTLIST_BF      0x11
-#define _3DPRIM_LINESTRIP_CONT    0x12
-#define _3DPRIM_LINESTRIP_BF      0x13
-#define _3DPRIM_LINESTRIP_CONT_BF 0x14
-#define _3DPRIM_TRIFAN_NOSTIPPLE  0x15
+#define GEN6_LOGICOPFUNCTION_CLEAR		0
+#define GEN6_LOGICOPFUNCTION_NOR		1
+#define GEN6_LOGICOPFUNCTION_AND_INVERTED	2
+#define GEN6_LOGICOPFUNCTION_COPY_INVERTED	3
+#define GEN6_LOGICOPFUNCTION_AND_REVERSE	4
+#define GEN6_LOGICOPFUNCTION_INVERT		5
+#define GEN6_LOGICOPFUNCTION_XOR		6
+#define GEN6_LOGICOPFUNCTION_NAND		7
+#define GEN6_LOGICOPFUNCTION_AND		8
+#define GEN6_LOGICOPFUNCTION_EQUIV		9
+#define GEN6_LOGICOPFUNCTION_NOOP		10
+#define GEN6_LOGICOPFUNCTION_OR_INVERTED	11
+#define GEN6_LOGICOPFUNCTION_COPY		12
+#define GEN6_LOGICOPFUNCTION_OR_REVERSE		13
+#define GEN6_LOGICOPFUNCTION_OR			14
+#define GEN6_LOGICOPFUNCTION_SET		15
 
-#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
-#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     1
+#define GEN6_MAPFILTER_NEAREST			0x0
+#define GEN6_MAPFILTER_LINEAR			0x1
+#define GEN6_MAPFILTER_ANISOTROPIC		0x2
 
-#define GEN6_ANISORATIO_2     0 
-#define GEN6_ANISORATIO_4     1 
-#define GEN6_ANISORATIO_6     2 
-#define GEN6_ANISORATIO_8     3 
-#define GEN6_ANISORATIO_10    4 
-#define GEN6_ANISORATIO_12    5 
-#define GEN6_ANISORATIO_14    6 
-#define GEN6_ANISORATIO_16    7
+#define GEN6_MIPFILTER_NONE			0
+#define GEN6_MIPFILTER_NEAREST			1
+#define GEN6_MIPFILTER_LINEAR			3
 
-#define GEN6_BLENDFACTOR_ONE                 0x1
-#define GEN6_BLENDFACTOR_SRC_COLOR           0x2
-#define GEN6_BLENDFACTOR_SRC_ALPHA           0x3
-#define GEN6_BLENDFACTOR_DST_ALPHA           0x4
-#define GEN6_BLENDFACTOR_DST_COLOR           0x5
-#define GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE  0x6
-#define GEN6_BLENDFACTOR_CONST_COLOR         0x7
-#define GEN6_BLENDFACTOR_CONST_ALPHA         0x8
-#define GEN6_BLENDFACTOR_SRC1_COLOR          0x9
-#define GEN6_BLENDFACTOR_SRC1_ALPHA          0x0A
-#define GEN6_BLENDFACTOR_ZERO                0x11
-#define GEN6_BLENDFACTOR_INV_SRC_COLOR       0x12
-#define GEN6_BLENDFACTOR_INV_SRC_ALPHA       0x13
-#define GEN6_BLENDFACTOR_INV_DST_ALPHA       0x14
-#define GEN6_BLENDFACTOR_INV_DST_COLOR       0x15
-#define GEN6_BLENDFACTOR_INV_CONST_COLOR     0x17
-#define GEN6_BLENDFACTOR_INV_CONST_ALPHA     0x18
-#define GEN6_BLENDFACTOR_INV_SRC1_COLOR      0x19
-#define GEN6_BLENDFACTOR_INV_SRC1_ALPHA      0x1A
+#define GEN6_POLYGON_FRONT_FACING		0
+#define GEN6_POLYGON_BACK_FACING		1
 
-#define GEN6_BLENDFUNCTION_ADD               0
-#define GEN6_BLENDFUNCTION_SUBTRACT          1
-#define GEN6_BLENDFUNCTION_REVERSE_SUBTRACT  2
-#define GEN6_BLENDFUNCTION_MIN               3
-#define GEN6_BLENDFUNCTION_MAX               4
+#define GEN6_PREFILTER_ALWAYS			0x0
+#define GEN6_PREFILTER_NEVER			0x1
+#define GEN6_PREFILTER_LESS			0x2
+#define GEN6_PREFILTER_EQUAL			0x3
+#define GEN6_PREFILTER_LEQUAL			0x4
+#define GEN6_PREFILTER_GREATER			0x5
+#define GEN6_PREFILTER_NOTEQUAL			0x6
+#define GEN6_PREFILTER_GEQUAL			0x7
 
-#define GEN6_ALPHATEST_FORMAT_UNORM8         0
-#define GEN6_ALPHATEST_FORMAT_FLOAT32        1
+#define GEN6_PROVOKING_VERTEX_0			0
+#define GEN6_PROVOKING_VERTEX_1			1
+#define GEN6_PROVOKING_VERTEX_2			2
 
-#define GEN6_CHROMAKEY_KILL_ON_ANY_MATCH  0
-#define GEN6_CHROMAKEY_REPLACE_BLACK      1
+#define GEN6_RASTRULE_UPPER_LEFT		0
+#define GEN6_RASTRULE_UPPER_RIGHT		1
 
-#define GEN6_CLIP_API_OGL     0
-#define GEN6_CLIP_API_DX      1
+#define GEN6_RENDERTARGET_CLAMPRANGE_UNORM	0
+#define GEN6_RENDERTARGET_CLAMPRANGE_SNORM	1
+#define GEN6_RENDERTARGET_CLAMPRANGE_FORMAT	2
 
-#define GEN6_CLIPMODE_NORMAL              0
-#define GEN6_CLIPMODE_CLIP_ALL            1
-#define GEN6_CLIPMODE_CLIP_NON_REJECTED   2
-#define GEN6_CLIPMODE_REJECT_ALL          3
-#define GEN6_CLIPMODE_ACCEPT_ALL          4
+#define GEN6_STENCILOP_KEEP			0
+#define GEN6_STENCILOP_ZERO			1
+#define GEN6_STENCILOP_REPLACE			2
+#define GEN6_STENCILOP_INCRSAT			3
+#define GEN6_STENCILOP_DECRSAT			4
+#define GEN6_STENCILOP_INCR			5
+#define GEN6_STENCILOP_DECR			6
+#define GEN6_STENCILOP_INVERT			7
 
-#define GEN6_CLIP_NDCSPACE     0
-#define GEN6_CLIP_SCREENSPACE  1
-
-#define GEN6_COMPAREFUNCTION_ALWAYS       0
-#define GEN6_COMPAREFUNCTION_NEVER        1
-#define GEN6_COMPAREFUNCTION_LESS         2
-#define GEN6_COMPAREFUNCTION_EQUAL        3
-#define GEN6_COMPAREFUNCTION_LEQUAL       4
-#define GEN6_COMPAREFUNCTION_GREATER      5
-#define GEN6_COMPAREFUNCTION_NOTEQUAL     6
-#define GEN6_COMPAREFUNCTION_GEQUAL       7
-
-#define GEN6_COVERAGE_PIXELS_HALF     0
-#define GEN6_COVERAGE_PIXELS_1        1
-#define GEN6_COVERAGE_PIXELS_2        2
-#define GEN6_COVERAGE_PIXELS_4        3
-
-#define GEN6_CULLMODE_BOTH        0
-#define GEN6_CULLMODE_NONE        1
-#define GEN6_CULLMODE_FRONT       2
-#define GEN6_CULLMODE_BACK        3
-
-#define GEN6_DEFAULTCOLOR_R8G8B8A8_UNORM      0
-#define GEN6_DEFAULTCOLOR_R32G32B32A32_FLOAT  1
-
-#define GEN6_DEPTHFORMAT_D32_FLOAT_S8X24_UINT     0
-#define GEN6_DEPTHFORMAT_D32_FLOAT                1
-#define GEN6_DEPTHFORMAT_D24_UNORM_S8_UINT        2
-#define GEN6_DEPTHFORMAT_D16_UNORM                5
-
-#define GEN6_FLOATING_POINT_IEEE_754        0
-#define GEN6_FLOATING_POINT_NON_IEEE_754    1
-
-#define GEN6_FRONTWINDING_CW      0
-#define GEN6_FRONTWINDING_CCW     1
-
-#define GEN6_INDEX_BYTE     0
-#define GEN6_INDEX_WORD     1
-#define GEN6_INDEX_DWORD    2
-
-#define GEN6_LOGICOPFUNCTION_CLEAR            0
-#define GEN6_LOGICOPFUNCTION_NOR              1
-#define GEN6_LOGICOPFUNCTION_AND_INVERTED     2
-#define GEN6_LOGICOPFUNCTION_COPY_INVERTED    3
-#define GEN6_LOGICOPFUNCTION_AND_REVERSE      4
-#define GEN6_LOGICOPFUNCTION_INVERT           5
-#define GEN6_LOGICOPFUNCTION_XOR              6
-#define GEN6_LOGICOPFUNCTION_NAND             7
-#define GEN6_LOGICOPFUNCTION_AND              8
-#define GEN6_LOGICOPFUNCTION_EQUIV            9
-#define GEN6_LOGICOPFUNCTION_NOOP             10
-#define GEN6_LOGICOPFUNCTION_OR_INVERTED      11
-#define GEN6_LOGICOPFUNCTION_COPY             12
-#define GEN6_LOGICOPFUNCTION_OR_REVERSE       13
-#define GEN6_LOGICOPFUNCTION_OR               14
-#define GEN6_LOGICOPFUNCTION_SET              15  
-
-#define GEN6_MAPFILTER_NEAREST        0x0 
-#define GEN6_MAPFILTER_LINEAR         0x1 
-#define GEN6_MAPFILTER_ANISOTROPIC    0x2
-
-#define GEN6_MIPFILTER_NONE        0   
-#define GEN6_MIPFILTER_NEAREST     1   
-#define GEN6_MIPFILTER_LINEAR      3
-
-#define GEN6_POLYGON_FRONT_FACING     0
-#define GEN6_POLYGON_BACK_FACING      1
-
-#define GEN6_PREFILTER_ALWAYS     0x0 
-#define GEN6_PREFILTER_NEVER      0x1
-#define GEN6_PREFILTER_LESS       0x2
-#define GEN6_PREFILTER_EQUAL      0x3
-#define GEN6_PREFILTER_LEQUAL     0x4
-#define GEN6_PREFILTER_GREATER    0x5
-#define GEN6_PREFILTER_NOTEQUAL   0x6
-#define GEN6_PREFILTER_GEQUAL     0x7
-
-#define GEN6_PROVOKING_VERTEX_0    0
-#define GEN6_PROVOKING_VERTEX_1    1 
-#define GEN6_PROVOKING_VERTEX_2    2
-
-#define GEN6_RASTRULE_UPPER_LEFT  0    
-#define GEN6_RASTRULE_UPPER_RIGHT 1
-
-#define GEN6_RENDERTARGET_CLAMPRANGE_UNORM    0
-#define GEN6_RENDERTARGET_CLAMPRANGE_SNORM    1
-#define GEN6_RENDERTARGET_CLAMPRANGE_FORMAT   2
-
-#define GEN6_STENCILOP_KEEP               0
-#define GEN6_STENCILOP_ZERO               1
-#define GEN6_STENCILOP_REPLACE            2
-#define GEN6_STENCILOP_INCRSAT            3
-#define GEN6_STENCILOP_DECRSAT            4
-#define GEN6_STENCILOP_INCR               5
-#define GEN6_STENCILOP_DECR               6
-#define GEN6_STENCILOP_INVERT             7
-
-#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW   0
-#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT   1
+#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW		0
+#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT		1
 
 #define GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT             0x000
 #define GEN6_SURFACEFORMAT_R32G32B32A32_SINT              0x001
@@ -926,258 +664,256 @@
 #define GEN6_SURFACEFORMAT_R16G16B16_SSCALED              0x19E
 #define GEN6_SURFACEFORMAT_R16G16B16_USCALED              0x19F
 
-#define GEN6_SURFACERETURNFORMAT_FLOAT32  0
-#define GEN6_SURFACERETURNFORMAT_S1       1
+#define GEN6_SURFACERETURNFORMAT_FLOAT32	0
+#define GEN6_SURFACERETURNFORMAT_S1		1
 
-#define GEN6_SURFACE_1D      0
-#define GEN6_SURFACE_2D      1
-#define GEN6_SURFACE_3D      2
-#define GEN6_SURFACE_CUBE    3
-#define GEN6_SURFACE_BUFFER  4
-#define GEN6_SURFACE_NULL    7
+#define GEN6_SURFACE_1D				0
+#define GEN6_SURFACE_2D				1
+#define GEN6_SURFACE_3D				2
+#define GEN6_SURFACE_CUBE			3
+#define GEN6_SURFACE_BUFFER			4
+#define GEN6_SURFACE_NULL			7
 
-#define GEN6_BORDER_COLOR_MODE_DEFAULT	0
-#define GEN6_BORDER_COLOR_MODE_LEGACY	1
+#define GEN6_BORDER_COLOR_MODE_DEFAULT		0
+#define GEN6_BORDER_COLOR_MODE_LEGACY		1
 
-#define GEN6_TEXCOORDMODE_WRAP            0
-#define GEN6_TEXCOORDMODE_MIRROR          1
-#define GEN6_TEXCOORDMODE_CLAMP           2
-#define GEN6_TEXCOORDMODE_CUBE            3
-#define GEN6_TEXCOORDMODE_CLAMP_BORDER    4
-#define GEN6_TEXCOORDMODE_MIRROR_ONCE     5
+#define GEN6_TEXCOORDMODE_WRAP			0
+#define GEN6_TEXCOORDMODE_MIRROR		1
+#define GEN6_TEXCOORDMODE_CLAMP			2
+#define GEN6_TEXCOORDMODE_CUBE			3
+#define GEN6_TEXCOORDMODE_CLAMP_BORDER		4
+#define GEN6_TEXCOORDMODE_MIRROR_ONCE		5
 
-#define GEN6_THREAD_PRIORITY_NORMAL   0
-#define GEN6_THREAD_PRIORITY_HIGH     1
+#define GEN6_THREAD_PRIORITY_NORMAL		0
+#define GEN6_THREAD_PRIORITY_HIGH		1
 
-#define GEN6_TILEWALK_XMAJOR                 0
-#define GEN6_TILEWALK_YMAJOR                 1
+#define GEN6_TILEWALK_XMAJOR			0
+#define GEN6_TILEWALK_YMAJOR			1
 
-#define GEN6_VERTEX_SUBPIXEL_PRECISION_8BITS  0
-#define GEN6_VERTEX_SUBPIXEL_PRECISION_4BITS  1
+#define GEN6_VERTEX_SUBPIXEL_PRECISION_8BITS	0
+#define GEN6_VERTEX_SUBPIXEL_PRECISION_4BITS	1
 
 #define GEN6_VERTEXBUFFER_ACCESS_VERTEXDATA     0
 #define GEN6_VERTEXBUFFER_ACCESS_INSTANCEDATA   1
 
-#define GEN6_VFCOMPONENT_NOSTORE      0
-#define GEN6_VFCOMPONENT_STORE_SRC    1
-#define GEN6_VFCOMPONENT_STORE_0      2
-#define GEN6_VFCOMPONENT_STORE_1_FLT  3
-#define GEN6_VFCOMPONENT_STORE_1_INT  4
-#define GEN6_VFCOMPONENT_STORE_VID    5
-#define GEN6_VFCOMPONENT_STORE_IID    6
-#define GEN6_VFCOMPONENT_STORE_PID    7
-
-
+#define GEN6_VFCOMPONENT_NOSTORE		0
+#define GEN6_VFCOMPONENT_STORE_SRC		1
+#define GEN6_VFCOMPONENT_STORE_0		2
+#define GEN6_VFCOMPONENT_STORE_1_FLT		3
+#define GEN6_VFCOMPONENT_STORE_1_INT		4
+#define GEN6_VFCOMPONENT_STORE_VID		5
+#define GEN6_VFCOMPONENT_STORE_IID		6
+#define GEN6_VFCOMPONENT_STORE_PID		7
 
 /* Execution Unit (EU) defines
  */
 
-#define GEN6_ALIGN_1   0
-#define GEN6_ALIGN_16  1
+#define GEN6_ALIGN_1				0
+#define GEN6_ALIGN_16				1
 
-#define GEN6_ADDRESS_DIRECT                        0
-#define GEN6_ADDRESS_REGISTER_INDIRECT_REGISTER    1
+#define GEN6_ADDRESS_DIRECT			0
+#define GEN6_ADDRESS_REGISTER_INDIRECT_REGISTER	1
 
-#define GEN6_CHANNEL_X     0
-#define GEN6_CHANNEL_Y     1
-#define GEN6_CHANNEL_Z     2
-#define GEN6_CHANNEL_W     3
+#define GEN6_CHANNEL_X				0
+#define GEN6_CHANNEL_Y				1
+#define GEN6_CHANNEL_Z				2
+#define GEN6_CHANNEL_W				3
 
-#define GEN6_COMPRESSION_NONE          0
-#define GEN6_COMPRESSION_2NDHALF       1
-#define GEN6_COMPRESSION_COMPRESSED    2
+#define GEN6_COMPRESSION_NONE			0
+#define GEN6_COMPRESSION_2NDHALF		1
+#define GEN6_COMPRESSION_COMPRESSED		2
 
-#define GEN6_CONDITIONAL_NONE  0
-#define GEN6_CONDITIONAL_Z     1
-#define GEN6_CONDITIONAL_NZ    2
-#define GEN6_CONDITIONAL_EQ    1	/* Z */
-#define GEN6_CONDITIONAL_NEQ   2	/* NZ */
-#define GEN6_CONDITIONAL_G     3
-#define GEN6_CONDITIONAL_GE    4
-#define GEN6_CONDITIONAL_L     5
-#define GEN6_CONDITIONAL_LE    6
-#define GEN6_CONDITIONAL_C     7
-#define GEN6_CONDITIONAL_O     8
+#define GEN6_CONDITIONAL_NONE			0
+#define GEN6_CONDITIONAL_Z			1
+#define GEN6_CONDITIONAL_NZ			2
+#define GEN6_CONDITIONAL_EQ			1	/* Z */
+#define GEN6_CONDITIONAL_NEQ			2	/* NZ */
+#define GEN6_CONDITIONAL_G			3
+#define GEN6_CONDITIONAL_GE			4
+#define GEN6_CONDITIONAL_L			5
+#define GEN6_CONDITIONAL_LE			6
+#define GEN6_CONDITIONAL_C			7
+#define GEN6_CONDITIONAL_O			8
 
-#define GEN6_DEBUG_NONE        0
-#define GEN6_DEBUG_BREAKPOINT  1
+#define GEN6_DEBUG_NONE				0
+#define GEN6_DEBUG_BREAKPOINT			1
 
-#define GEN6_DEPENDENCY_NORMAL         0
-#define GEN6_DEPENDENCY_NOTCLEARED     1
-#define GEN6_DEPENDENCY_NOTCHECKED     2
-#define GEN6_DEPENDENCY_DISABLE        3
+#define GEN6_DEPENDENCY_NORMAL			0
+#define GEN6_DEPENDENCY_NOTCLEARED		1
+#define GEN6_DEPENDENCY_NOTCHECKED		2
+#define GEN6_DEPENDENCY_DISABLE			3
 
-#define GEN6_EXECUTE_1     0
-#define GEN6_EXECUTE_2     1
-#define GEN6_EXECUTE_4     2
-#define GEN6_EXECUTE_8     3
-#define GEN6_EXECUTE_16    4
-#define GEN6_EXECUTE_32    5
+#define GEN6_EXECUTE_1				0
+#define GEN6_EXECUTE_2				1
+#define GEN6_EXECUTE_4				2
+#define GEN6_EXECUTE_8				3
+#define GEN6_EXECUTE_16				4
+#define GEN6_EXECUTE_32				5
 
-#define GEN6_HORIZONTAL_STRIDE_0   0
-#define GEN6_HORIZONTAL_STRIDE_1   1
-#define GEN6_HORIZONTAL_STRIDE_2   2
-#define GEN6_HORIZONTAL_STRIDE_4   3
+#define GEN6_HORIZONTAL_STRIDE_0		0
+#define GEN6_HORIZONTAL_STRIDE_1		1
+#define GEN6_HORIZONTAL_STRIDE_2		2
+#define GEN6_HORIZONTAL_STRIDE_4		3
 
-#define GEN6_INSTRUCTION_NORMAL    0
-#define GEN6_INSTRUCTION_SATURATE  1
+#define GEN6_INSTRUCTION_NORMAL			0
+#define GEN6_INSTRUCTION_SATURATE		1
 
-#define GEN6_MASK_ENABLE   0
-#define GEN6_MASK_DISABLE  1
+#define GEN6_MASK_ENABLE			0
+#define GEN6_MASK_DISABLE			1
 
-#define GEN6_OPCODE_MOV        1
-#define GEN6_OPCODE_SEL        2
-#define GEN6_OPCODE_NOT        4
-#define GEN6_OPCODE_AND        5
-#define GEN6_OPCODE_OR         6
-#define GEN6_OPCODE_XOR        7
-#define GEN6_OPCODE_SHR        8
-#define GEN6_OPCODE_SHL        9
-#define GEN6_OPCODE_RSR        10
-#define GEN6_OPCODE_RSL        11
-#define GEN6_OPCODE_ASR        12
-#define GEN6_OPCODE_CMP        16
-#define GEN6_OPCODE_JMPI       32
-#define GEN6_OPCODE_IF         34
-#define GEN6_OPCODE_IFF        35
-#define GEN6_OPCODE_ELSE       36
-#define GEN6_OPCODE_ENDIF      37
-#define GEN6_OPCODE_DO         38
-#define GEN6_OPCODE_WHILE      39
-#define GEN6_OPCODE_BREAK      40
-#define GEN6_OPCODE_CONTINUE   41
-#define GEN6_OPCODE_HALT       42
-#define GEN6_OPCODE_MSAVE      44
-#define GEN6_OPCODE_MRESTORE   45
-#define GEN6_OPCODE_PUSH       46
-#define GEN6_OPCODE_POP        47
-#define GEN6_OPCODE_WAIT       48
-#define GEN6_OPCODE_SEND       49
-#define GEN6_OPCODE_ADD        64
-#define GEN6_OPCODE_MUL        65
-#define GEN6_OPCODE_AVG        66
-#define GEN6_OPCODE_FRC        67
-#define GEN6_OPCODE_RNDU       68
-#define GEN6_OPCODE_RNDD       69
-#define GEN6_OPCODE_RNDE       70
-#define GEN6_OPCODE_RNDZ       71
-#define GEN6_OPCODE_MAC        72
-#define GEN6_OPCODE_MACH       73
-#define GEN6_OPCODE_LZD        74
-#define GEN6_OPCODE_SAD2       80
-#define GEN6_OPCODE_SADA2      81
-#define GEN6_OPCODE_DP4        84
-#define GEN6_OPCODE_DPH        85
-#define GEN6_OPCODE_DP3        86
-#define GEN6_OPCODE_DP2        87
-#define GEN6_OPCODE_DPA2       88
-#define GEN6_OPCODE_LINE       89
-#define GEN6_OPCODE_NOP        126
+#define GEN6_OPCODE_MOV				1
+#define GEN6_OPCODE_SEL				2
+#define GEN6_OPCODE_NOT				4
+#define GEN6_OPCODE_AND				5
+#define GEN6_OPCODE_OR				6
+#define GEN6_OPCODE_XOR				7
+#define GEN6_OPCODE_SHR				8
+#define GEN6_OPCODE_SHL				9
+#define GEN6_OPCODE_RSR				10
+#define GEN6_OPCODE_RSL				11
+#define GEN6_OPCODE_ASR				12
+#define GEN6_OPCODE_CMP				16
+#define GEN6_OPCODE_JMPI			32
+#define GEN6_OPCODE_IF				34
+#define GEN6_OPCODE_IFF				35
+#define GEN6_OPCODE_ELSE			36
+#define GEN6_OPCODE_ENDIF			37
+#define GEN6_OPCODE_DO				38
+#define GEN6_OPCODE_WHILE			39
+#define GEN6_OPCODE_BREAK			40
+#define GEN6_OPCODE_CONTINUE			41
+#define GEN6_OPCODE_HALT			42
+#define GEN6_OPCODE_MSAVE			44
+#define GEN6_OPCODE_MRESTORE			45
+#define GEN6_OPCODE_PUSH			46
+#define GEN6_OPCODE_POP				47
+#define GEN6_OPCODE_WAIT			48
+#define GEN6_OPCODE_SEND			49
+#define GEN6_OPCODE_ADD				64
+#define GEN6_OPCODE_MUL				65
+#define GEN6_OPCODE_AVG				66
+#define GEN6_OPCODE_FRC				67
+#define GEN6_OPCODE_RNDU			68
+#define GEN6_OPCODE_RNDD			69
+#define GEN6_OPCODE_RNDE			70
+#define GEN6_OPCODE_RNDZ			71
+#define GEN6_OPCODE_MAC				72
+#define GEN6_OPCODE_MACH			73
+#define GEN6_OPCODE_LZD				74
+#define GEN6_OPCODE_SAD2			80
+#define GEN6_OPCODE_SADA2			81
+#define GEN6_OPCODE_DP4				84
+#define GEN6_OPCODE_DPH				85
+#define GEN6_OPCODE_DP3				86
+#define GEN6_OPCODE_DP2				87
+#define GEN6_OPCODE_DPA2			88
+#define GEN6_OPCODE_LINE			89
+#define GEN6_OPCODE_NOP				126
 
-#define GEN6_PREDICATE_NONE             0
-#define GEN6_PREDICATE_NORMAL           1
-#define GEN6_PREDICATE_ALIGN1_ANYV             2
-#define GEN6_PREDICATE_ALIGN1_ALLV             3
-#define GEN6_PREDICATE_ALIGN1_ANY2H            4
-#define GEN6_PREDICATE_ALIGN1_ALL2H            5
-#define GEN6_PREDICATE_ALIGN1_ANY4H            6
-#define GEN6_PREDICATE_ALIGN1_ALL4H            7
-#define GEN6_PREDICATE_ALIGN1_ANY8H            8
-#define GEN6_PREDICATE_ALIGN1_ALL8H            9
-#define GEN6_PREDICATE_ALIGN1_ANY16H           10
-#define GEN6_PREDICATE_ALIGN1_ALL16H           11
-#define GEN6_PREDICATE_ALIGN16_REPLICATE_X     2
-#define GEN6_PREDICATE_ALIGN16_REPLICATE_Y     3
-#define GEN6_PREDICATE_ALIGN16_REPLICATE_Z     4
-#define GEN6_PREDICATE_ALIGN16_REPLICATE_W     5
-#define GEN6_PREDICATE_ALIGN16_ANY4H           6
-#define GEN6_PREDICATE_ALIGN16_ALL4H           7
+#define GEN6_PREDICATE_NONE			0
+#define GEN6_PREDICATE_NORMAL			1
+#define GEN6_PREDICATE_ALIGN1_ANYV		2
+#define GEN6_PREDICATE_ALIGN1_ALLV		3
+#define GEN6_PREDICATE_ALIGN1_ANY2H		4
+#define GEN6_PREDICATE_ALIGN1_ALL2H		5
+#define GEN6_PREDICATE_ALIGN1_ANY4H		6
+#define GEN6_PREDICATE_ALIGN1_ALL4H		7
+#define GEN6_PREDICATE_ALIGN1_ANY8H		8
+#define GEN6_PREDICATE_ALIGN1_ALL8H		9
+#define GEN6_PREDICATE_ALIGN1_ANY16H		10
+#define GEN6_PREDICATE_ALIGN1_ALL16H		11
+#define GEN6_PREDICATE_ALIGN16_REPLICATE_X	2
+#define GEN6_PREDICATE_ALIGN16_REPLICATE_Y	3
+#define GEN6_PREDICATE_ALIGN16_REPLICATE_Z	4
+#define GEN6_PREDICATE_ALIGN16_REPLICATE_W	5
+#define GEN6_PREDICATE_ALIGN16_ANY4H		6
+#define GEN6_PREDICATE_ALIGN16_ALL4H		7
 
-#define GEN6_ARCHITECTURE_REGISTER_FILE    0
-#define GEN6_GENERAL_REGISTER_FILE         1
-#define GEN6_MESSAGE_REGISTER_FILE         2
-#define GEN6_IMMEDIATE_VALUE               3
+#define GEN6_ARCHITECTURE_REGISTER_FILE		0
+#define GEN6_GENERAL_REGISTER_FILE		1
+#define GEN6_MESSAGE_REGISTER_FILE		2
+#define GEN6_IMMEDIATE_VALUE			3
 
-#define GEN6_REGISTER_TYPE_UD  0
-#define GEN6_REGISTER_TYPE_D   1
-#define GEN6_REGISTER_TYPE_UW  2
-#define GEN6_REGISTER_TYPE_W   3
-#define GEN6_REGISTER_TYPE_UB  4
-#define GEN6_REGISTER_TYPE_B   5
-#define GEN6_REGISTER_TYPE_VF  5	/* packed float vector, immediates only? */
-#define GEN6_REGISTER_TYPE_HF  6
-#define GEN6_REGISTER_TYPE_V   6	/* packed int vector, immediates only, uword dest only */
-#define GEN6_REGISTER_TYPE_F   7
+#define GEN6_REGISTER_TYPE_UD			0
+#define GEN6_REGISTER_TYPE_D			1
+#define GEN6_REGISTER_TYPE_UW			2
+#define GEN6_REGISTER_TYPE_W			3
+#define GEN6_REGISTER_TYPE_UB			4
+#define GEN6_REGISTER_TYPE_B			5
+/* packed float vector, immediates only? */
+#define GEN6_REGISTER_TYPE_VF			5
+#define GEN6_REGISTER_TYPE_HF			6
+/* packed int vector, immediates only, uword dest only */
+#define GEN6_REGISTER_TYPE_V			6
+#define GEN6_REGISTER_TYPE_F			7
 
-#define GEN6_ARF_NULL                  0x00
-#define GEN6_ARF_ADDRESS               0x10
-#define GEN6_ARF_ACCUMULATOR           0x20   
-#define GEN6_ARF_FLAG                  0x30
-#define GEN6_ARF_MASK                  0x40
-#define GEN6_ARF_MASK_STACK            0x50
-#define GEN6_ARF_MASK_STACK_DEPTH      0x60
-#define GEN6_ARF_STATE                 0x70
-#define GEN6_ARF_CONTROL               0x80
-#define GEN6_ARF_NOTIFICATION_COUNT    0x90
-#define GEN6_ARF_IP                    0xA0
+#define GEN6_ARF_NULL				0x00
+#define GEN6_ARF_ADDRESS			0x10
+#define GEN6_ARF_ACCUMULATOR			0x20
+#define GEN6_ARF_FLAG				0x30
+#define GEN6_ARF_MASK				0x40
+#define GEN6_ARF_MASK_STACK			0x50
+#define GEN6_ARF_MASK_STACK_DEPTH		0x60
+#define GEN6_ARF_STATE				0x70
+#define GEN6_ARF_CONTROL			0x80
+#define GEN6_ARF_NOTIFICATION_COUNT		0x90
+#define GEN6_ARF_IP				0xA0
 
-#define GEN6_AMASK   0
-#define GEN6_IMASK   1
-#define GEN6_LMASK   2
-#define GEN6_CMASK   3
+#define GEN6_AMASK				0
+#define GEN6_IMASK				1
+#define GEN6_LMASK				2
+#define GEN6_CMASK				3
 
+#define GEN6_THREAD_NORMAL			0
+#define GEN6_THREAD_ATOMIC			1
+#define GEN6_THREAD_SWITCH			2
 
+#define GEN6_VERTICAL_STRIDE_0			0
+#define GEN6_VERTICAL_STRIDE_1			1
+#define GEN6_VERTICAL_STRIDE_2			2
+#define GEN6_VERTICAL_STRIDE_4			3
+#define GEN6_VERTICAL_STRIDE_8			4
+#define GEN6_VERTICAL_STRIDE_16			5
+#define GEN6_VERTICAL_STRIDE_32			6
+#define GEN6_VERTICAL_STRIDE_64			7
+#define GEN6_VERTICAL_STRIDE_128		8
+#define GEN6_VERTICAL_STRIDE_256		9
+#define GEN6_VERTICAL_STRIDE_ONE_DIMENSIONAL	0xF
 
-#define GEN6_THREAD_NORMAL     0
-#define GEN6_THREAD_ATOMIC     1
-#define GEN6_THREAD_SWITCH     2
+#define GEN6_WIDTH_1				0
+#define GEN6_WIDTH_2				1
+#define GEN6_WIDTH_4				2
+#define GEN6_WIDTH_8				3
+#define GEN6_WIDTH_16				4
 
-#define GEN6_VERTICAL_STRIDE_0                 0
-#define GEN6_VERTICAL_STRIDE_1                 1
-#define GEN6_VERTICAL_STRIDE_2                 2
-#define GEN6_VERTICAL_STRIDE_4                 3
-#define GEN6_VERTICAL_STRIDE_8                 4
-#define GEN6_VERTICAL_STRIDE_16                5
-#define GEN6_VERTICAL_STRIDE_32                6
-#define GEN6_VERTICAL_STRIDE_64                7
-#define GEN6_VERTICAL_STRIDE_128               8
-#define GEN6_VERTICAL_STRIDE_256               9
-#define GEN6_VERTICAL_STRIDE_ONE_DIMENSIONAL   0xF
+#define GEN6_STATELESS_BUFFER_BOUNDARY_1K	0
+#define GEN6_STATELESS_BUFFER_BOUNDARY_2K	1
+#define GEN6_STATELESS_BUFFER_BOUNDARY_4K	2
+#define GEN6_STATELESS_BUFFER_BOUNDARY_8K	3
+#define GEN6_STATELESS_BUFFER_BOUNDARY_16K	4
+#define GEN6_STATELESS_BUFFER_BOUNDARY_32K	5
+#define GEN6_STATELESS_BUFFER_BOUNDARY_64K	6
+#define GEN6_STATELESS_BUFFER_BOUNDARY_128K	7
+#define GEN6_STATELESS_BUFFER_BOUNDARY_256K	8
+#define GEN6_STATELESS_BUFFER_BOUNDARY_512K	9
+#define GEN6_STATELESS_BUFFER_BOUNDARY_1M	10
+#define GEN6_STATELESS_BUFFER_BOUNDARY_2M	11
 
-#define GEN6_WIDTH_1       0
-#define GEN6_WIDTH_2       1
-#define GEN6_WIDTH_4       2
-#define GEN6_WIDTH_8       3
-#define GEN6_WIDTH_16      4
+#define GEN6_POLYGON_FACING_FRONT		0
+#define GEN6_POLYGON_FACING_BACK		1
 
-#define GEN6_STATELESS_BUFFER_BOUNDARY_1K      0
-#define GEN6_STATELESS_BUFFER_BOUNDARY_2K      1
-#define GEN6_STATELESS_BUFFER_BOUNDARY_4K      2
-#define GEN6_STATELESS_BUFFER_BOUNDARY_8K      3
-#define GEN6_STATELESS_BUFFER_BOUNDARY_16K     4
-#define GEN6_STATELESS_BUFFER_BOUNDARY_32K     5
-#define GEN6_STATELESS_BUFFER_BOUNDARY_64K     6
-#define GEN6_STATELESS_BUFFER_BOUNDARY_128K    7
-#define GEN6_STATELESS_BUFFER_BOUNDARY_256K    8
-#define GEN6_STATELESS_BUFFER_BOUNDARY_512K    9
-#define GEN6_STATELESS_BUFFER_BOUNDARY_1M      10
-#define GEN6_STATELESS_BUFFER_BOUNDARY_2M      11
+#define GEN6_MESSAGE_TARGET_NULL		0
+#define GEN6_MESSAGE_TARGET_MATH		1
+#define GEN6_MESSAGE_TARGET_SAMPLER		2
+#define GEN6_MESSAGE_TARGET_GATEWAY		3
+#define GEN6_MESSAGE_TARGET_DATAPORT_READ	4
+#define GEN6_MESSAGE_TARGET_DATAPORT_WRITE	5
+#define GEN6_MESSAGE_TARGET_URB			6
+#define GEN6_MESSAGE_TARGET_THREAD_SPAWNER	7
 
-#define GEN6_POLYGON_FACING_FRONT      0
-#define GEN6_POLYGON_FACING_BACK       1
-
-#define GEN6_MESSAGE_TARGET_NULL               0
-#define GEN6_MESSAGE_TARGET_MATH               1
-#define GEN6_MESSAGE_TARGET_SAMPLER            2
-#define GEN6_MESSAGE_TARGET_GATEWAY            3
-#define GEN6_MESSAGE_TARGET_DATAPORT_READ      4
-#define GEN6_MESSAGE_TARGET_DATAPORT_WRITE     5
-#define GEN6_MESSAGE_TARGET_URB                6
-#define GEN6_MESSAGE_TARGET_THREAD_SPAWNER     7
-
-#define GEN6_SAMPLER_RETURN_FORMAT_FLOAT32     0
-#define GEN6_SAMPLER_RETURN_FORMAT_UINT32      2
-#define GEN6_SAMPLER_RETURN_FORMAT_SINT32      3
+#define GEN6_SAMPLER_RETURN_FORMAT_FLOAT32	0
+#define GEN6_SAMPLER_RETURN_FORMAT_UINT32	2
+#define GEN6_SAMPLER_RETURN_FORMAT_SINT32	3
 
 #define GEN6_SAMPLER_MESSAGE_SIMD8_SAMPLE              0
 #define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE             0
@@ -1196,26 +932,26 @@
 #define GEN6_SAMPLER_MESSAGE_SIMD8_LD                  3
 #define GEN6_SAMPLER_MESSAGE_SIMD16_LD                 3
 
-#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0
-#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1
-#define GEN6_DATAPORT_OWORD_BLOCK_2_OWORDS     2
-#define GEN6_DATAPORT_OWORD_BLOCK_4_OWORDS     3
-#define GEN6_DATAPORT_OWORD_BLOCK_8_OWORDS     4
+#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDLOW	0
+#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDHIGH	1
+#define GEN6_DATAPORT_OWORD_BLOCK_2_OWORDS	2
+#define GEN6_DATAPORT_OWORD_BLOCK_4_OWORDS	3
+#define GEN6_DATAPORT_OWORD_BLOCK_8_OWORDS	4
 
-#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_1OWORD     0
-#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS    2
+#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_1OWORD	0
+#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS	2
 
-#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS   2
-#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS  3
+#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS	2
+#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS	3
 
-#define GEN6_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ          0
-#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     1
-#define GEN6_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ          2
-#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      3
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ		0
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ	1
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ		2
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ		3
 
-#define GEN6_DATAPORT_READ_TARGET_DATA_CACHE      0
-#define GEN6_DATAPORT_READ_TARGET_RENDER_CACHE    1
-#define GEN6_DATAPORT_READ_TARGET_SAMPLER_CACHE   2
+#define GEN6_DATAPORT_READ_TARGET_DATA_CACHE	0
+#define GEN6_DATAPORT_READ_TARGET_RENDER_CACHE	1
+#define GEN6_DATAPORT_READ_TARGET_SAMPLER_CACHE	2
 
 #define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE                0
 #define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED     1
@@ -1245,36 +981,36 @@
 #define GEN6_MATH_FUNCTION_INT_DIV_QUOTIENT                 12
 #define GEN6_MATH_FUNCTION_INT_DIV_REMAINDER                13
 
-#define GEN6_MATH_INTEGER_UNSIGNED     0
-#define GEN6_MATH_INTEGER_SIGNED       1
+#define GEN6_MATH_INTEGER_UNSIGNED		0
+#define GEN6_MATH_INTEGER_SIGNED		1
 
-#define GEN6_MATH_PRECISION_FULL        0
-#define GEN6_MATH_PRECISION_PARTIAL     1
+#define GEN6_MATH_PRECISION_FULL		0
+#define GEN6_MATH_PRECISION_PARTIAL		1
 
-#define GEN6_MATH_SATURATE_NONE         0
-#define GEN6_MATH_SATURATE_SATURATE     1
+#define GEN6_MATH_SATURATE_NONE			0
+#define GEN6_MATH_SATURATE_SATURATE		1
 
-#define GEN6_MATH_DATA_VECTOR  0
-#define GEN6_MATH_DATA_SCALAR  1
+#define GEN6_MATH_DATA_VECTOR			0
+#define GEN6_MATH_DATA_SCALAR			1
 
-#define GEN6_URB_OPCODE_WRITE  0
+#define GEN6_URB_OPCODE_WRITE			0
 
-#define GEN6_URB_SWIZZLE_NONE          0
-#define GEN6_URB_SWIZZLE_INTERLEAVE    1
-#define GEN6_URB_SWIZZLE_TRANSPOSE     2
+#define GEN6_URB_SWIZZLE_NONE			0
+#define GEN6_URB_SWIZZLE_INTERLEAVE		1
+#define GEN6_URB_SWIZZLE_TRANSPOSE		2
 
-#define GEN6_SCRATCH_SPACE_SIZE_1K     0
-#define GEN6_SCRATCH_SPACE_SIZE_2K     1
-#define GEN6_SCRATCH_SPACE_SIZE_4K     2
-#define GEN6_SCRATCH_SPACE_SIZE_8K     3
-#define GEN6_SCRATCH_SPACE_SIZE_16K    4
-#define GEN6_SCRATCH_SPACE_SIZE_32K    5
-#define GEN6_SCRATCH_SPACE_SIZE_64K    6
-#define GEN6_SCRATCH_SPACE_SIZE_128K   7
-#define GEN6_SCRATCH_SPACE_SIZE_256K   8
-#define GEN6_SCRATCH_SPACE_SIZE_512K   9
-#define GEN6_SCRATCH_SPACE_SIZE_1M     10
-#define GEN6_SCRATCH_SPACE_SIZE_2M     11
+#define GEN6_SCRATCH_SPACE_SIZE_1K		0
+#define GEN6_SCRATCH_SPACE_SIZE_2K		1
+#define GEN6_SCRATCH_SPACE_SIZE_4K		2
+#define GEN6_SCRATCH_SPACE_SIZE_8K		3
+#define GEN6_SCRATCH_SPACE_SIZE_16K		4
+#define GEN6_SCRATCH_SPACE_SIZE_32K		5
+#define GEN6_SCRATCH_SPACE_SIZE_64K		6
+#define GEN6_SCRATCH_SPACE_SIZE_128K		7
+#define GEN6_SCRATCH_SPACE_SIZE_256K		8
+#define GEN6_SCRATCH_SPACE_SIZE_512K		9
+#define GEN6_SCRATCH_SPACE_SIZE_1M		10
+#define GEN6_SCRATCH_SPACE_SIZE_2M		11
 
 /* The hardware supports two different modes for border color. The
  * default (OpenGL) mode uses floating-point color channels, while the
-- 
2.9.5

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib
  2018-04-10 10:35 ` [igt-dev] [PATCH i-g-t] " Katarzyna Dec
@ 2018-04-10 11:29   ` Kalamarz, Lukasz
  0 siblings, 0 replies; 28+ messages in thread
From: Kalamarz, Lukasz @ 2018-04-10 11:29 UTC (permalink / raw)
  To: Dec, Katarzyna, igt-dev@lists.freedesktop.org

On Tue, 2018-04-10 at 12:35 +0200, Katarzyna Dec wrote:
> On Mon, Apr 09, 2018 at 05:42:29PM +0200, Lukasz Kalamarz wrote:
> > This patch is starting a series of refactoring changes for *render*
> > libs. A lot of code in those libraries is copy/pasted and renamed
> > for
> > different gen.
> > 
> > Changes made in this patch:
> > - removal of duplicated registers definitions
> > - move field definitions above it register definition
> > - move definitions of register into ascending order
> > - unify spaces between register name and it's address/value
> > 
> > Signed-off-by: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
> > Cc: Katarzyna Dec <katarzyna.dec@intel.com>
> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > +#define GEN6_REGISTER_TYPE_UB			4
> > +#define GEN6_REGISTER_TYPE_B			5
> > +#define GEN6_REGISTER_TYPE_VF			5	/*
> > packed float vector, immediates only? */
> > +#define GEN6_REGISTER_TYPE_HF			6
> > +#define GEN6_REGISTER_TYPE_V			6	/*
> > packed int vector, immediates only, uword dest only */
> > +#define GEN6_REGISTER_TYPE_F			7
> > 
> 
> Generally looks good. There are few style issues: too long lines
> (like the one above),
> mixed tabs and spaces, etc. Use checkpatch for see what needs to be
> changed.

Fixed in next version of patch.
> I think that this changes can be introduced in another patch, so it
> will be easier
> to review.

Since similar changes were introduced in v1 I simply updated it with
Your suggestion.
--
Lukasz
> 
> Kasia
> > -- 
> > 2.9.5
> > 
> 
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v5 0/4] Refactoring of *_fill libraries
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                   ` (8 preceding siblings ...)
  2018-04-10 11:28 ` [igt-dev] [PATCH i-g-t v2] lib/gen6_render: Refactoring lib Lukasz Kalamarz
@ 2018-04-10 11:34 ` Katarzyna Dec
  2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 1/4] lib: Move common gpgpu/media fill functions to gpu_fill library Katarzyna Dec
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 11:34 UTC (permalink / raw)
  To: igt-dev

This series is removing duplications in gpgpu_fill and media_fill
libraries. As a first step I moved gpgpu and media helper functions
to gpu_fill library. In second patch I adjusted code to our coding
style. In the third not obvious duplications were removed (like
adding in gen7 functions conditions for future gens). Last patch
adds missing parameters that make GPU hang on gen9 and gen9+.

In first version of this series there was a comment about moving
batch_alloc/copy etc. functions to intel_batchbuffer library.
Because there is a lot of code to review already this change will
be introduced in another series (rendercopy, media_fill, gpgpu_fill
and media_spin code is affected by this).

It is possible that more changes around gen*_media.h and media_spin
is needed, but this will be done as a next step.

v2: Removed not obvious duplications. Adjusted code to review comments.
v3: Series needed reorganization because it introduced bug to ALP,
which was hard to find. That is why patch 1 is now almost only moving
functions to gpu_fill with removing duplications, such as the same
functions. Also applied comments from review.
v4: Added #defines and copyrights to new gpu_fill library. Changed functions
order in gpu_fill library.
v5: Version with no changes comparing to v4 - git sent-mail sent
series to wrong thread...

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>


Katarzyna Dec (4):
  lib: Move common gpgpu/media fill functions to gpu_fill library
  lib: Remove duplications in gpu_fill library
  lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill
  lib: Adjust refactored gpu_fill library to our coding style

 lib/Makefile.sources    |   3 +-
 lib/gpgpu_fill.c        | 600 ++--------------------------------------------
 lib/gpgpu_fill.h        |  12 +-
 lib/gpu_fill.c          | 626 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/gpu_fill.h          | 135 +++++++++++
 lib/intel_batchbuffer.c |   4 +-
 lib/media_fill.h        |  23 +-
 lib/media_fill_gen7.c   | 278 +--------------------
 lib/media_fill_gen8.c   | 305 +----------------------
 lib/media_fill_gen8lp.c | 367 ----------------------------
 lib/media_fill_gen9.c   | 313 +-----------------------
 lib/meson.build         |   2 +-
 12 files changed, 820 insertions(+), 1848 deletions(-)
 create mode 100644 lib/gpu_fill.c
 create mode 100644 lib/gpu_fill.h
 delete mode 100644 lib/media_fill_gen8lp.c

-- 
2.14.3

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v5 1/4] lib: Move common gpgpu/media fill functions to gpu_fill library
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                   ` (9 preceding siblings ...)
  2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 0/4] Refactoring of *_fill libraries Katarzyna Dec
@ 2018-04-10 11:34 ` Katarzyna Dec
  2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 2/4] lib: Remove duplications in " Katarzyna Dec
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 11:34 UTC (permalink / raw)
  To: igt-dev

Gpgpu_fill and media_fill libraries are very similar and many
functions can be shared. I have created library gpu_fill with
all functions needed for implementing gpgpu_fill and media_fill
tests for all Gens. For reviewing and debugging purposes this patch
should be only moving functions from few libraries to one removing
functions identical for both media and gpgpu.
Places in the code that required more changes:
  Removing gen7_fill_gpgpu_kernel function that is identical to
gen7_fill_media_kernel and introduces conflict with moving
genX_fill_interface_descriptor, which are the same for media and gpgpu.
  Function gen8_fill_media_kernel is not removed in this patch
(although it is identical with gen7 version), because this patch
should be as much as possible functions movement.
  gen8_fill_interface_descriptor was unified for media and gpgpu
by adding kernel and its size as a parameter (this parameters
were missing in media gen8, gen8lp and gen9 functions)
  gen8_emit_state_base_address was unified, the one for gpgpu was
configured like it would be using indirect state (while we are
using CURBE). I have checked that media fill version
(OUT_BATCH(0 | BASE_ADDRESS_MODIFY)) works fine on gpgpu gen8 and newer.

v2: Changed code layout. GenX_fill_media_kernel was identical to
genX_fill_gpgpu_kernel so this function was unified to
gen7_fill_kernel. There were 2 very similar functions
gen8_emit_state_base_address for media and gpgpu, where the one
for gpgpu was configured like it would be using indirect state
(while we are using CURBE). I have checked if media fill version
works fine in gpgpu test on Gen8 and unified them.

v3: Made patch easier for reviewing moving changes unifying code for
various gens (that were included v1) to other patch, leaving only
the most critical code changes.

v4: Added copyrights and #define to gpu_fill.h

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 lib/Makefile.sources    |   2 +
 lib/gpgpu_fill.c        | 571 +-----------------------------------
 lib/gpu_fill.c          | 758 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/gpu_fill.h          | 167 +++++++++++
 lib/media_fill_gen7.c   | 271 +----------------
 lib/media_fill_gen8.c   | 290 +-----------------
 lib/media_fill_gen8lp.c | 284 +-----------------
 lib/media_fill_gen9.c   | 298 +------------------
 lib/meson.build         |   1 +
 9 files changed, 937 insertions(+), 1705 deletions(-)
 create mode 100644 lib/gpu_fill.c
 create mode 100644 lib/gpu_fill.h

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 3d37ef1d..45e65dd7 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -64,6 +64,8 @@ lib_source_list =	 	\
 	media_spin.c		\
 	gpgpu_fill.h		\
 	gpgpu_fill.c		\
+	gpu_fill.h		\
+	gpu_fill.c		\
 	gen7_media.h            \
 	gen8_media.h            \
 	rendercopy_i915.c	\
diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index 4d98643d..f2765fd6 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -30,10 +30,9 @@
 
 #include "intel_reg.h"
 #include "drmtest.h"
-#include "intel_batchbuffer.h"
-#include "gen7_media.h"
-#include "gen8_media.h"
+
 #include "gpgpu_fill.h"
+#include "gpu_fill.h"
 
 /* shaders/gpgpu/gpgpu_fill.gxa */
 static const uint32_t gen7_gpgpu_kernel[][4] = {
@@ -75,572 +74,6 @@ static const uint32_t gen9_gpgpu_kernel[][4] = {
 	{ 0x07800031, 0x20000a40, 0x06000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size,
-	   uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-static uint32_t
-gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch, uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen7_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN7_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss1.base_addr = buf->bo->offset;
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen8_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN8_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-	ss->ss0.vertical_alignment = 1; /* align 4 */
-	ss->ss0.horizontal_alignment = 1; /* align 4 */
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss8.base_addr = buf->bo->offset;
-
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 8 * 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert_eq(ret, 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-
-}
-
-static uint32_t
-gen7_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen7_fill_surface_state(batch, dst, GEN7_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst, GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_gpgpu_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
-			       const uint32_t kernel[][4], size_t size)
-{
-	struct gen7_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen7_fill_binding_table(batch, dst);
-	kernel_offset = gen7_fill_gpgpu_kernel(batch, kernel, size);
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc1.single_program_flow = 1;
-	idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
-
-	idd->desc2.sampler_count = 0;      /* 0 samplers used */
-	idd->desc2.sampler_state_pointer = 0;
-
-	idd->desc3.binding_table_entry_count = 0;
-	idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc4.constant_urb_entry_read_offset = 0;
-	idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
-			       const uint32_t kernel[][4], size_t size)
-{
-	struct gen8_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen7_fill_gpgpu_kernel(batch, kernel, size);
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc2.single_program_flow = 1;
-	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
-
-	idd->desc3.sampler_count = 0;      /* 0 samplers used */
-	idd->desc3.sampler_state_pointer = 0;
-
-	idd->desc4.binding_table_entry_count = 0;
-	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc5.constant_urb_entry_read_offset = 0;
-	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen7_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
-
-	/* general */
-	OUT_BATCH(0);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general/dynamic/indirect/instruction access Bound */
-	OUT_BATCH(0);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-}
-
-static void
-gen8_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
-
-	/* general */
-	OUT_BATCH(0 | (0x78 << 4) | (0 << 1) |  BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		  0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0 );
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-}
-
-static void
-gen9_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
-
-	/* general */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-
-	/* Bindless surface state base address */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-	OUT_BATCH(0xfffff000);
-}
-
-static void
-gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 | /* max num of threads */
-		  0 << 8 | /* num of URB entry */
-		  1 << 2); /* GPGPU mode */
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(0 << 16 | 	/* URB entry size in 256 bits unit */
-		  1);		/* CURBE entry size in 256 bits unit */
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 | 1 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(0 << 16 | 1);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN7_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-static void
-gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
-		     unsigned x, unsigned y,
-		     unsigned width, unsigned height)
-{
-	uint32_t x_dim, y_dim, tmp, right_mask;
-
-	/*
-	 * Simply do SIMD16 based dispatch, so every thread uses
-	 * SIMD16 channels.
-	 *
-	 * Define our own thread group size, e.g 16x1 for every group, then
-	 * will have 1 thread each group in SIMD16 dispatch. So thread
-	 * width/height/depth are all 1.
-	 *
-	 * Then thread group X = width / 16 (aligned to 16)
-	 * thread group Y = height;
-	 */
-	x_dim = (width + 15) / 16;
-	y_dim = height;
-
-	tmp = width & 15;
-	if (tmp == 0)
-		right_mask = (1 << 16) - 1;
-	else
-		right_mask = (1 << tmp) - 1;
-
-	OUT_BATCH(GEN7_GPGPU_WALKER | 9);
-
-	/* interface descriptor offset */
-	OUT_BATCH(0);
-
-	/* SIMD size, thread w/h/d */
-	OUT_BATCH(1 << 30 | /* SIMD16 */
-		  0 << 16 | /* depth:1 */
-		  0 << 8 | /* height:1 */
-		  0); /* width:1 */
-
-	/* thread group X */
-	OUT_BATCH(0);
-	OUT_BATCH(x_dim);
-
-	/* thread group Y */
-	OUT_BATCH(0);
-	OUT_BATCH(y_dim);
-
-	/* thread group Z */
-	OUT_BATCH(0);
-	OUT_BATCH(1);
-
-	/* right mask */
-	OUT_BATCH(right_mask);
-
-	/* bottom mask, height 1, always 0xffffffff */
-	OUT_BATCH(0xffffffff);
-}
-
-static void
-gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
-		     unsigned x, unsigned y,
-		     unsigned width, unsigned height)
-{
-	uint32_t x_dim, y_dim, tmp, right_mask;
-
-	/*
-	 * Simply do SIMD16 based dispatch, so every thread uses
-	 * SIMD16 channels.
-	 *
-	 * Define our own thread group size, e.g 16x1 for every group, then
-	 * will have 1 thread each group in SIMD16 dispatch. So thread
-	 * width/height/depth are all 1.
-	 *
-	 * Then thread group X = width / 16 (aligned to 16)
-	 * thread group Y = height;
-	 */
-	x_dim = (width + 15) / 16;
-	y_dim = height;
-
-	tmp = width & 15;
-	if (tmp == 0)
-		right_mask = (1 << 16) - 1;
-	else
-		right_mask = (1 << tmp) - 1;
-
-	OUT_BATCH(GEN7_GPGPU_WALKER | 13);
-
-	OUT_BATCH(0); /* kernel offset */
-	OUT_BATCH(0); /* indirect data length */
-	OUT_BATCH(0); /* indirect data offset */
-
-	/* SIMD size, thread w/h/d */
-	OUT_BATCH(1 << 30 | /* SIMD16 */
-		  0 << 16 | /* depth:1 */
-		  0 << 8 | /* height:1 */
-		  0); /* width:1 */
-
-	/* thread group X */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(x_dim);
-
-	/* thread group Y */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(y_dim);
-
-	/* thread group Z */
-	OUT_BATCH(0);
-	OUT_BATCH(1);
-
-	/* right mask */
-	OUT_BATCH(right_mask);
-
-	/* bottom mask, height 1, always 0xffffffff */
-	OUT_BATCH(0xffffffff);
-}
-
 /*
  * This sets up the gpgpu pipeline,
  *
diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
new file mode 100644
index 00000000..172c6db6
--- /dev/null
+++ b/lib/gpu_fill.c
@@ -0,0 +1,758 @@
+#include "gpu_fill.h"
+
+uint32_t
+batch_used(struct intel_batchbuffer *batch)
+{
+	return batch->ptr - batch->buffer;
+}
+
+uint32_t
+batch_align(struct intel_batchbuffer *batch, uint32_t align)
+{
+	uint32_t offset = batch_used(batch);
+	offset = ALIGN(offset, align);
+	batch->ptr = batch->buffer + offset;
+	return offset;
+}
+
+void *
+batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
+{
+	uint32_t offset = batch_align(batch, align);
+	batch->ptr += size;
+	return memset(batch->buffer + offset, 0, size);
+}
+
+uint32_t
+batch_offset(struct intel_batchbuffer *batch, void *ptr)
+{
+	return (uint8_t *)ptr - batch->buffer;
+}
+
+uint32_t
+batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
+{
+	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
+}
+
+void
+gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
+{
+	int ret;
+
+	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
+	if (ret == 0)
+		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
+					NULL, 0, 0, 0);
+	igt_assert(ret == 0);
+}
+
+uint32_t
+gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
+			uint8_t color)
+{
+	uint8_t *curbe_buffer;
+	uint32_t offset;
+
+	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
+	offset = batch_offset(batch, curbe_buffer);
+	*curbe_buffer = color;
+
+	return offset;
+}
+
+uint32_t
+gen7_fill_surface_state(struct intel_batchbuffer *batch,
+			struct igt_buf *buf,
+			uint32_t format,
+			int is_dst)
+{
+	struct gen7_surface_state *ss;
+	uint32_t write_domain, read_domain, offset;
+	int ret;
+
+	if (is_dst) {
+		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
+	} else {
+		write_domain = 0;
+		read_domain = I915_GEM_DOMAIN_SAMPLER;
+	}
+
+	ss = batch_alloc(batch, sizeof(*ss), 64);
+	offset = batch_offset(batch, ss);
+
+	ss->ss0.surface_type = GEN7_SURFACE_2D;
+	ss->ss0.surface_format = format;
+	ss->ss0.render_cache_read_write = 1;
+
+	if (buf->tiling == I915_TILING_X)
+		ss->ss0.tiled_mode = 2;
+	else if (buf->tiling == I915_TILING_Y)
+		ss->ss0.tiled_mode = 3;
+
+	ss->ss1.base_addr = buf->bo->offset;
+	ret = drm_intel_bo_emit_reloc(batch->bo,
+				batch_offset(batch, ss) + 4,
+				buf->bo, 0,
+				read_domain, write_domain);
+	igt_assert(ret == 0);
+
+	ss->ss2.height = igt_buf_height(buf) - 1;
+	ss->ss2.width  = igt_buf_width(buf) - 1;
+
+	ss->ss3.pitch  = buf->stride - 1;
+
+	ss->ss7.shader_chanel_select_r = 4;
+	ss->ss7.shader_chanel_select_g = 5;
+	ss->ss7.shader_chanel_select_b = 6;
+	ss->ss7.shader_chanel_select_a = 7;
+
+	return offset;
+}
+
+uint32_t
+gen7_fill_binding_table(struct intel_batchbuffer *batch,
+			struct igt_buf *dst)
+{
+	uint32_t *binding_table, offset;
+
+	binding_table = batch_alloc(batch, 32, 64);
+	offset = batch_offset(batch, binding_table);
+	binding_table[0] = gen7_fill_surface_state(batch, dst,
+						GEN7_SURFACEFORMAT_R8_UNORM, 1);
+
+	return offset;
+}
+
+uint32_t
+gen7_fill_media_kernel(struct intel_batchbuffer *batch,
+		const uint32_t kernel[][4],
+		size_t size)
+{
+	uint32_t offset;
+
+	offset = batch_copy(batch, kernel, size, 64);
+
+	return offset;
+}
+
+uint32_t
+gen8_fill_media_kernel(struct intel_batchbuffer *batch,
+		const uint32_t kernel[][4],
+		size_t size)
+{
+	uint32_t offset;
+
+	offset = batch_copy(batch, kernel, size, 64);
+
+	return offset;
+}
+
+uint32_t
+gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
+			       const uint32_t kernel[][4], size_t size)
+{
+	struct gen7_interface_descriptor_data *idd;
+	uint32_t offset;
+	uint32_t binding_table_offset, kernel_offset;
+
+	binding_table_offset = gen7_fill_binding_table(batch, dst);
+	kernel_offset = gen7_fill_media_kernel(batch, kernel, size);
+
+	idd = batch_alloc(batch, sizeof(*idd), 64);
+	offset = batch_offset(batch, idd);
+
+	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
+
+	idd->desc1.single_program_flow = 1;
+	idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
+
+	idd->desc2.sampler_count = 0;      /* 0 samplers used */
+	idd->desc2.sampler_state_pointer = 0;
+
+	idd->desc3.binding_table_entry_count = 0;
+	idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
+
+	idd->desc4.constant_urb_entry_read_offset = 0;
+	idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
+
+	return offset;
+}
+
+void
+gen7_emit_state_base_address(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
+
+	/* general */
+	OUT_BATCH(0);
+
+	/* surface */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* dynamic */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* indirect */
+	OUT_BATCH(0);
+
+	/* instruction */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* general/dynamic/indirect/instruction access Bound */
+	OUT_BATCH(0);
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+}
+
+void
+gen7_emit_vfe_state(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
+
+	/* scratch buffer */
+	OUT_BATCH(0);
+
+	/* number of threads & urb entries */
+	OUT_BATCH(1 << 16 |
+		2 << 8);
+
+	OUT_BATCH(0);
+
+	/* urb entry size & curbe size */
+	OUT_BATCH(2 << 16 | 	/* in 256 bits unit */
+		2);		/* in 256 bits unit */
+
+	/* scoreboard */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
+
+	/* scratch buffer */
+	OUT_BATCH(0);
+
+	/* number of threads & urb entries */
+	OUT_BATCH(1 << 16 | /* max num of threads */
+		  0 << 8 | /* num of URB entry */
+		  1 << 2); /* GPGPU mode */
+
+	OUT_BATCH(0);
+
+	/* urb entry size & curbe size */
+	OUT_BATCH(0 << 16 | 	/* URB entry size in 256 bits unit */
+		  1);		/* CURBE entry size in 256 bits unit */
+
+	/* scoreboard */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
+{
+	OUT_BATCH(GEN7_MEDIA_CURBE_LOAD | (4 - 2));
+	OUT_BATCH(0);
+	/* curbe total data length */
+	OUT_BATCH(64);
+	/* curbe data start address, is relative to the dynamics base address */
+	OUT_BATCH(curbe_buffer);
+}
+
+void
+gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
+{
+	OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
+	OUT_BATCH(0);
+	/* interface descriptor data length */
+	OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
+	/* interface descriptor address, is relative to the dynamics base address */
+	OUT_BATCH(interface_descriptor);
+}
+
+void
+gen7_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height)
+{
+	int i, j;
+
+	for (i = 0; i < width / 16; i++) {
+		for (j = 0; j < height / 16; j++) {
+			OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
+
+			/* interface descriptor offset */
+			OUT_BATCH(0);
+
+			/* without indirect data */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* scoreboard */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* inline data (xoffset, yoffset) */
+			OUT_BATCH(x + i * 16);
+			OUT_BATCH(y + j * 16);
+		}
+	}
+}
+
+void
+gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
+		     unsigned x, unsigned y,
+		     unsigned width, unsigned height)
+{
+	uint32_t x_dim, y_dim, tmp, right_mask;
+
+	/*
+	 * Simply do SIMD16 based dispatch, so every thread uses
+	 * SIMD16 channels.
+	 *
+	 * Define our own thread group size, e.g 16x1 for every group, then
+	 * will have 1 thread each group in SIMD16 dispatch. So thread
+	 * width/height/depth are all 1.
+	 *
+	 * Then thread group X = width / 16 (aligned to 16)
+	 * thread group Y = height;
+	 */
+	x_dim = (width + 15) / 16;
+	y_dim = height;
+
+	tmp = width & 15;
+	if (tmp == 0)
+		right_mask = (1 << 16) - 1;
+	else
+		right_mask = (1 << tmp) - 1;
+
+	OUT_BATCH(GEN7_GPGPU_WALKER | 9);
+
+	/* interface descriptor offset */
+	OUT_BATCH(0);
+
+	/* SIMD size, thread w/h/d */
+	OUT_BATCH(1 << 30 | /* SIMD16 */
+		  0 << 16 | /* depth:1 */
+		  0 << 8 | /* height:1 */
+		  0); /* width:1 */
+
+	/* thread group X */
+	OUT_BATCH(0);
+	OUT_BATCH(x_dim);
+
+	/* thread group Y */
+	OUT_BATCH(0);
+	OUT_BATCH(y_dim);
+
+	/* thread group Z */
+	OUT_BATCH(0);
+	OUT_BATCH(1);
+
+	/* right mask */
+	OUT_BATCH(right_mask);
+
+	/* bottom mask, height 1, always 0xffffffff */
+	OUT_BATCH(0xffffffff);
+}
+
+void
+gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
+{
+	int ret;
+
+	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
+	if (ret == 0)
+		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
+					NULL, 0, 0, 0);
+	igt_assert(ret == 0);
+}
+
+
+uint32_t
+gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
+			uint8_t color)
+{
+	uint8_t *curbe_buffer;
+	uint32_t offset;
+
+	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
+	offset = batch_offset(batch, curbe_buffer);
+	*curbe_buffer = color;
+
+	return offset;
+}
+
+uint32_t
+gen8_fill_surface_state(struct intel_batchbuffer *batch,
+			struct igt_buf *buf,
+			uint32_t format,
+			int is_dst)
+{
+	struct gen8_surface_state *ss;
+	uint32_t write_domain, read_domain, offset;
+	int ret;
+
+	if (is_dst) {
+		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
+	} else {
+		write_domain = 0;
+		read_domain = I915_GEM_DOMAIN_SAMPLER;
+	}
+
+	ss = batch_alloc(batch, sizeof(*ss), 64);
+	offset = batch_offset(batch, ss);
+
+	ss->ss0.surface_type = GEN8_SURFACE_2D;
+	ss->ss0.surface_format = format;
+	ss->ss0.render_cache_read_write = 1;
+	ss->ss0.vertical_alignment = 1; /* align 4 */
+	ss->ss0.horizontal_alignment = 1; /* align 4 */
+
+	if (buf->tiling == I915_TILING_X)
+		ss->ss0.tiled_mode = 2;
+	else if (buf->tiling == I915_TILING_Y)
+		ss->ss0.tiled_mode = 3;
+
+	ss->ss8.base_addr = buf->bo->offset;
+
+	ret = drm_intel_bo_emit_reloc(batch->bo,
+				batch_offset(batch, ss) + 8 * 4,
+				buf->bo, 0,
+				read_domain, write_domain);
+	igt_assert(ret == 0);
+
+	ss->ss2.height = igt_buf_height(buf) - 1;
+	ss->ss2.width  = igt_buf_width(buf) - 1;
+	ss->ss3.pitch  = buf->stride - 1;
+
+	ss->ss7.shader_chanel_select_r = 4;
+	ss->ss7.shader_chanel_select_g = 5;
+	ss->ss7.shader_chanel_select_b = 6;
+	ss->ss7.shader_chanel_select_a = 7;
+
+	return offset;
+}
+
+uint32_t
+gen8_fill_binding_table(struct intel_batchbuffer *batch,
+			struct igt_buf *dst)
+{
+	uint32_t *binding_table, offset;
+
+	binding_table = batch_alloc(batch, 32, 64);
+	offset = batch_offset(batch, binding_table);
+
+	binding_table[0] = gen8_fill_surface_state(batch, dst,
+						GEN8_SURFACEFORMAT_R8_UNORM, 1);
+
+	return offset;
+}
+
+uint32_t
+gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size)
+{
+	struct gen8_interface_descriptor_data *idd;
+	uint32_t offset;
+	uint32_t binding_table_offset, kernel_offset;
+
+	binding_table_offset = gen8_fill_binding_table(batch, dst);
+	kernel_offset = gen8_fill_media_kernel(batch, kernel, size);
+
+	idd = batch_alloc(batch, sizeof(*idd), 64);
+	offset = batch_offset(batch, idd);
+
+	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
+
+	idd->desc2.single_program_flow = 1;
+	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
+
+	idd->desc3.sampler_count = 0;      /* 0 samplers used */
+	idd->desc3.sampler_state_pointer = 0;
+
+	idd->desc4.binding_table_entry_count = 0;
+	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
+
+	idd->desc5.constant_urb_entry_read_offset = 0;
+	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
+
+	return offset;
+}
+
+void
+gen8_emit_state_base_address(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
+
+	/* general */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+
+	/* stateless data port */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+
+	/* surface */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
+
+	/* dynamic */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
+		0, BASE_ADDRESS_MODIFY);
+
+	/* indirect */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* instruction */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* general state buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* dynamic state buffer size */
+	OUT_BATCH(1 << 12 | 1);
+	/* indirect object buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
+	OUT_BATCH(1 << 12 | 1);
+}
+
+void
+gen8_emit_vfe_state(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
+
+	/* scratch buffer */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* number of threads & urb entries */
+	OUT_BATCH(1 << 16 |
+		2 << 8);
+
+	OUT_BATCH(0);
+
+	/* urb entry size & curbe size */
+	OUT_BATCH(2 << 16 |
+		2);
+
+	/* scoreboard */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
+
+	/* scratch buffer */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* number of threads & urb entries */
+	OUT_BATCH(1 << 16 | 1 << 8);
+
+	OUT_BATCH(0);
+
+	/* urb entry size & curbe size */
+	OUT_BATCH(0 << 16 | 1);
+
+	/* scoreboard */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
+{
+	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
+	OUT_BATCH(0);
+	/* curbe total data length */
+	OUT_BATCH(64);
+	/* curbe data start address, is relative to the dynamics base address */
+	OUT_BATCH(curbe_buffer);
+}
+
+void
+gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
+{
+	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
+	OUT_BATCH(0);
+	/* interface descriptor data length */
+	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
+	/* interface descriptor address, is relative to the dynamics base address */
+	OUT_BATCH(interface_descriptor);
+}
+
+void
+gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
+	OUT_BATCH(0);
+}
+
+void
+gen8_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height)
+{
+	int i, j;
+
+	for (i = 0; i < width / 16; i++) {
+		for (j = 0; j < height / 16; j++) {
+			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
+
+			/* interface descriptor offset */
+			OUT_BATCH(0);
+
+			/* without indirect data */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* scoreboard */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* inline data (xoffset, yoffset) */
+			OUT_BATCH(x + i * 16);
+			OUT_BATCH(y + j * 16);
+			gen8_emit_media_state_flush(batch);
+		}
+	}
+}
+void
+gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height)
+{
+	int i, j;
+
+	for (i = 0; i < width / 16; i++) {
+		for (j = 0; j < height / 16; j++) {
+			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
+
+			/* interface descriptor offset */
+			OUT_BATCH(0);
+
+			/* without indirect data */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* scoreboard */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* inline data (xoffset, yoffset) */
+			OUT_BATCH(x + i * 16);
+			OUT_BATCH(y + j * 16);
+		}
+	}
+}
+void
+gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
+		     unsigned x, unsigned y,
+		     unsigned width, unsigned height)
+{
+	uint32_t x_dim, y_dim, tmp, right_mask;
+
+	/*
+	 * Simply do SIMD16 based dispatch, so every thread uses
+	 * SIMD16 channels.
+	 *
+	 * Define our own thread group size, e.g 16x1 for every group, then
+	 * will have 1 thread each group in SIMD16 dispatch. So thread
+	 * width/height/depth are all 1.
+	 *
+	 * Then thread group X = width / 16 (aligned to 16)
+	 * thread group Y = height;
+	 */
+	x_dim = (width + 15) / 16;
+	y_dim = height;
+
+	tmp = width & 15;
+	if (tmp == 0)
+		right_mask = (1 << 16) - 1;
+	else
+		right_mask = (1 << tmp) - 1;
+
+	OUT_BATCH(GEN7_GPGPU_WALKER | 13);
+
+	OUT_BATCH(0); /* kernel offset */
+	OUT_BATCH(0); /* indirect data length */
+	OUT_BATCH(0); /* indirect data offset */
+
+	/* SIMD size, thread w/h/d */
+	OUT_BATCH(1 << 30 | /* SIMD16 */
+		  0 << 16 | /* depth:1 */
+		  0 << 8 | /* height:1 */
+		  0); /* width:1 */
+
+	/* thread group X */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(x_dim);
+
+	/* thread group Y */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(y_dim);
+
+	/* thread group Z */
+	OUT_BATCH(0);
+	OUT_BATCH(1);
+
+	/* right mask */
+	OUT_BATCH(right_mask);
+
+	/* bottom mask, height 1, always 0xffffffff */
+	OUT_BATCH(0xffffffff);
+}
+
+void
+gen9_emit_state_base_address(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
+
+	/* general */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+
+	/* stateless data port */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+
+	/* surface */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
+
+	/* dynamic */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
+		0, BASE_ADDRESS_MODIFY);
+
+	/* indirect */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* instruction */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* general state buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* dynamic state buffer size */
+	OUT_BATCH(1 << 12 | 1);
+	/* indirect object buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
+	OUT_BATCH(1 << 12 | 1);
+
+	/* Bindless surface state base address */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+	OUT_BATCH(0xfffff000);
+}
diff --git a/lib/gpu_fill.h b/lib/gpu_fill.h
new file mode 100644
index 00000000..87e62c86
--- /dev/null
+++ b/lib/gpu_fill.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef GPU_FILL_H
+#define GPU_FILL_H
+
+#include <intel_bufmgr.h>
+#include <i915_drm.h>
+
+#include "media_fill.h"
+#include "gen7_media.h"
+#include "gen8_media.h"
+#include "intel_reg.h"
+#include "drmtest.h"
+#include "intel_batchbuffer.h"
+#include "intel_chipset.h"
+#include <assert.h>
+
+uint32_t
+batch_used(struct intel_batchbuffer *batch);
+
+uint32_t
+batch_align(struct intel_batchbuffer *batch, uint32_t align);
+
+void *
+batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align);
+
+uint32_t
+batch_offset(struct intel_batchbuffer *batch, void *ptr);
+
+uint32_t
+batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align);
+
+void
+gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
+
+uint32_t
+gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
+			uint8_t color);
+
+uint32_t
+gen7_fill_surface_state(struct intel_batchbuffer *batch,
+			struct igt_buf *buf,
+			uint32_t format,
+			int is_dst);
+
+uint32_t
+gen7_fill_binding_table(struct intel_batchbuffer *batch,
+			struct igt_buf *dst);
+
+uint32_t
+gen7_fill_media_kernel(struct intel_batchbuffer *batch,
+		const uint32_t kernel[][4],
+		size_t size);
+
+uint32_t
+gen8_fill_media_kernel(struct intel_batchbuffer *batch,
+		const uint32_t kernel[][4],
+		size_t size);
+
+uint32_t
+gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
+			       const uint32_t kernel[][4], size_t size);
+
+void
+gen7_emit_state_base_address(struct intel_batchbuffer *batch);
+
+void
+gen7_emit_vfe_state(struct intel_batchbuffer *batch);
+
+void
+gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
+
+void
+gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
+
+void
+gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor);
+
+void
+gen7_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height);
+
+void
+gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
+		     unsigned x, unsigned y,
+		     unsigned width, unsigned height);
+
+void
+gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
+
+uint32_t
+gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
+			uint8_t color);
+
+uint32_t
+gen8_fill_surface_state(struct intel_batchbuffer *batch,
+			struct igt_buf *buf,
+			uint32_t format,
+			int is_dst);
+
+uint32_t
+gen8_fill_binding_table(struct intel_batchbuffer *batch,
+			struct igt_buf *dst);
+
+uint32_t
+gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size);
+
+void
+gen8_emit_state_base_address(struct intel_batchbuffer *batch);
+
+void
+gen8_emit_vfe_state(struct intel_batchbuffer *batch);
+
+void
+gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
+
+void
+gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
+
+void
+gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor);
+
+void
+gen8_emit_media_state_flush(struct intel_batchbuffer *batch);
+
+void
+gen8_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height);
+
+void
+gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height);
+
+void
+gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
+		     unsigned x, unsigned y,
+		     unsigned width, unsigned height);
+
+void
+gen9_emit_state_base_address(struct intel_batchbuffer *batch);
+
+#endif /* GPU_FILL_H */
diff --git a/lib/media_fill_gen7.c b/lib/media_fill_gen7.c
index 6fb44798..c97555a6 100644
--- a/lib/media_fill_gen7.c
+++ b/lib/media_fill_gen7.c
@@ -5,7 +5,7 @@
 #include "gen7_media.h"
 #include "intel_reg.h"
 #include "drmtest.h"
-
+#include "gpu_fill.h"
 #include <assert.h>
 
 static const uint32_t media_kernel[][4] = {
@@ -22,275 +22,6 @@ static const uint32_t media_kernel[][4] = {
 	{ 0x07800031, 0x20001ca8, 0x00000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-static uint32_t
-gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen7_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN7_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss1.base_addr = buf->bo->offset;
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen7_fill_surface_state(batch, dst, GEN7_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
-			       const uint32_t kernel[][4], size_t size)
-{
-	struct gen7_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen7_fill_binding_table(batch, dst);
-	kernel_offset = gen7_fill_media_kernel(batch, kernel, size);
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc1.single_program_flow = 1;
-	idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
-
-	idd->desc2.sampler_count = 0;      /* 0 samplers used */
-	idd->desc2.sampler_state_pointer = 0;
-
-	idd->desc3.binding_table_entry_count = 0;
-	idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc4.constant_urb_entry_read_offset = 0;
-	idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen7_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
-
-	/* general */
-	OUT_BATCH(0);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general/dynamic/indirect/instruction access Bound */
-	OUT_BATCH(0);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-}
-
-static void
-gen7_emit_vfe_state(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 |
-		2 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 | 	/* in 256 bits unit */
-		2);		/* in 256 bits unit */
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN7_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-static void
-gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen7_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-		}
-	}
-}
-
 /*
  * This sets up the media pipeline,
  *
diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
index 4a8fe5a2..4270997e 100644
--- a/lib/media_fill_gen8.c
+++ b/lib/media_fill_gen8.c
@@ -5,7 +5,7 @@
 #include "gen8_media.h"
 #include "intel_reg.h"
 #include "drmtest.h"
-
+#include "gpu_fill.h"
 #include <assert.h>
 
 
@@ -23,293 +23,7 @@ static const uint32_t media_kernel[][4] = {
 	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-static uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen8_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN8_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-	ss->ss0.vertical_alignment = 1; /* align 4 */
-	ss->ss0.horizontal_alignment = 1; /* align 4 */
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss8.base_addr = buf->bo->offset;
-
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 8 * 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst, GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst)
-{
-	struct gen8_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen8_fill_media_kernel(batch, media_kernel, sizeof(media_kernel));
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc2.single_program_flow = 1;
-	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
-
-	idd->desc3.sampler_count = 0;      /* 0 samplers used */
-	idd->desc3.sampler_state_pointer = 0;
-
-	idd->desc4.binding_table_entry_count = 0;
-	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc5.constant_urb_entry_read_offset = 0;
-	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen8_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
-
-	/* general */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-}
-
-static void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 |
-		2 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 |
-		2);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
 
-static void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-			gen8_emit_media_state_flush(batch);
-		}
-	}
-}
 
 /*
  * This sets up the media pipeline,
@@ -349,7 +63,7 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst);
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/media_fill_gen8lp.c b/lib/media_fill_gen8lp.c
index 1f8a4adc..dcc11982 100644
--- a/lib/media_fill_gen8lp.c
+++ b/lib/media_fill_gen8lp.c
@@ -5,7 +5,7 @@
 #include "gen8_media.h"
 #include "intel_reg.h"
 #include "drmtest.h"
-
+#include "gpu_fill.h"
 #include <assert.h>
 
 
@@ -23,286 +23,6 @@ static const uint32_t media_kernel[][4] = {
 	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-static uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen8_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN8_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-	ss->ss0.vertical_alignment = 1; /* align 4 */
-	ss->ss0.horizontal_alignment = 1; /* align 4 */
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss8.base_addr = buf->bo->offset;
-
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 8 * 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst, GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst)
-{
-	struct gen8_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen8_fill_media_kernel(batch, media_kernel, sizeof(media_kernel));
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc2.single_program_flow = 1;
-	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
-
-	idd->desc3.sampler_count = 0;      /* 0 samplers used */
-	idd->desc3.sampler_state_pointer = 0;
-
-	idd->desc4.binding_table_entry_count = 0;
-	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc5.constant_urb_entry_read_offset = 0;
-	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen8_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
-
-	/* general */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-}
-
-static void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 |
-		2 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 |
-		2);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-static void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-		}
-	}
-}
-
 /*
  * This sets up the media pipeline,
  *
@@ -341,7 +61,7 @@ gen8lp_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst);
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
index 3fd21819..6accdbe4 100644
--- a/lib/media_fill_gen9.c
+++ b/lib/media_fill_gen9.c
@@ -4,11 +4,9 @@
 #include "media_fill.h"
 #include "gen8_media.h"
 #include "intel_reg.h"
-
+#include "gpu_fill.h"
 #include <assert.h>
 
-#define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1))
-
 static const uint32_t media_kernel[][4] = {
 	{ 0x00400001, 0x20202288, 0x00000020, 0x00000000 },
 	{ 0x00600001, 0x20800208, 0x008d0000, 0x00000000 },
@@ -23,298 +21,6 @@ static const uint32_t media_kernel[][4] = {
 	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	assert(ret == 0);
-}
-
-static uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen8_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN8_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-	ss->ss0.vertical_alignment = 1; /* align 4 */
-	ss->ss0.horizontal_alignment = 1; /* align 4 */
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss8.base_addr = buf->bo->offset;
-
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 8 * 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst, GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst)
-{
-	struct gen8_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen8_fill_media_kernel(batch, media_kernel, sizeof(media_kernel));
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc2.single_program_flow = 1;
-	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
-
-	idd->desc3.sampler_count = 0;      /* 0 samplers used */
-	idd->desc3.sampler_state_pointer = 0;
-
-	idd->desc4.binding_table_entry_count = 0;
-	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc5.constant_urb_entry_read_offset = 0;
-	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen9_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
-
-	/* general */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-
-	/* Bindless surface state base address */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-	OUT_BATCH(0xfffff000);
-}
-
-static void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 |
-		2 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 |
-		2);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-static void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-			gen8_emit_media_state_flush(batch);
-		}
-	}
-}
 
 /*
  * This sets up the media pipeline,
@@ -354,7 +60,7 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst);
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/meson.build b/lib/meson.build
index b3b8b14a..385e08b9 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -30,6 +30,7 @@ lib_sources = [
 	'media_fill_gen9.c',
 	'media_spin.c',
 	'gpgpu_fill.c',
+	'gpu_fill.c',
 	'rendercopy_i915.c',
 	'rendercopy_i830.c',
 	'rendercopy_gen6.c',
-- 
2.14.3

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v5 2/4] lib: Remove duplications in gpu_fill library
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                   ` (10 preceding siblings ...)
  2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 1/4] lib: Move common gpgpu/media fill functions to gpu_fill library Katarzyna Dec
@ 2018-04-10 11:34 ` Katarzyna Dec
  2018-04-10 13:14   ` Kalamarz, Lukasz
  2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 3/4] lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill Katarzyna Dec
                   ` (3 subsequent siblings)
  15 siblings, 1 reply; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 11:34 UTC (permalink / raw)
  To: igt-dev

After moving all functions needed for gpgpu and media fill testing
there is a lot of duplications which can be removed:
  Library media_fill_gen8 and media_fill_gen8lp for CHT was removed,
media state flush for !CHT was added to gen7_emit_media_objects.
  Many gen8 functions were replaced with gen7 version with devid
parameter (gen7_fill_curbe_load, gen7_emit_interface_descriptor,
gen7_fill_binding_table, gen7_emit_media_objects). Unified fill kernel
function so it is applicable to all gens and both media and gpgpu
(merged gen7_fill_media_kernel and gen8_fill_media_kernel).
  Duplicated constants like GEN8_MEDIA_VFE_STATE, GEN8_MEDIA_CURBE_LOAD,
GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, GEN8_MEDIA_OBJECT were
replaced by GEN7 version. However this constants were not removed
from gen8_media.h library, because they are used by other tests
for Gen8+. More refactoring in this gen*_media.h libraries is needed.

It seems that further unification of *_fillfunc functions will
introduce more confusion in understanding what the tests are doing
and what were changes between Gens.

v2: Moved some reduntant changes from Move gpgpu/media fill to gpu_fill...
to this patch. Applied comments from review.

v3: rebase

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 lib/Makefile.sources    |   1 -
 lib/gpgpu_fill.c        |   2 +-
 lib/gpu_fill.c          | 172 +++++++-----------------------------------------
 lib/gpu_fill.h          |  38 +----------
 lib/intel_batchbuffer.c |   4 +-
 lib/media_fill.h        |   7 --
 lib/media_fill_gen8.c   |  10 +--
 lib/media_fill_gen8lp.c |  87 ------------------------
 lib/media_fill_gen9.c   |  10 +--
 lib/meson.build         |   1 -
 10 files changed, 39 insertions(+), 293 deletions(-)
 delete mode 100644 lib/media_fill_gen8lp.c

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 45e65dd7..9c0150c1 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -58,7 +58,6 @@ lib_source_list =	 	\
 	media_fill.h            \
 	media_fill_gen7.c       \
 	media_fill_gen8.c       \
-	media_fill_gen8lp.c     \
 	media_fill_gen9.c       \
 	media_spin.h		\
 	media_spin.c		\
diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index f2765fd6..579ce78d 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -180,7 +180,7 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	gen8_emit_state_base_address(batch);
 	gen8_emit_vfe_state_gpgpu(batch);
 	gen7_emit_curbe_load(batch, curbe_buffer);
-	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
+	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
 	gen8_emit_gpgpu_walk(batch, x, y, width, height);
 
 	OUT_BATCH(MI_BATCH_BUFFER_END);
diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
index 172c6db6..7d99dfd9 100644
--- a/lib/gpu_fill.c
+++ b/lib/gpu_fill.c
@@ -118,26 +118,18 @@ gen7_fill_binding_table(struct intel_batchbuffer *batch,
 
 	binding_table = batch_alloc(batch, 32, 64);
 	offset = batch_offset(batch, binding_table);
-	binding_table[0] = gen7_fill_surface_state(batch, dst,
+	if (IS_GEN7(batch->devid))
+		binding_table[0] = gen7_fill_surface_state(batch, dst,
 						GEN7_SURFACEFORMAT_R8_UNORM, 1);
+	else
+		binding_table[0] = gen8_fill_surface_state(batch, dst,
+						GEN8_SURFACEFORMAT_R8_UNORM, 1);
 
 	return offset;
 }
 
 uint32_t
-gen7_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
+gen7_fill_kernel(struct intel_batchbuffer *batch,
 		const uint32_t kernel[][4],
 		size_t size)
 {
@@ -157,7 +149,7 @@ gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *
 	uint32_t binding_table_offset, kernel_offset;
 
 	binding_table_offset = gen7_fill_binding_table(batch, dst);
-	kernel_offset = gen7_fill_media_kernel(batch, kernel, size);
+	kernel_offset = gen7_fill_kernel(batch, kernel, size);
 
 	idd = batch_alloc(batch, sizeof(*idd), 64);
 	offset = batch_offset(batch, idd);
@@ -272,7 +264,10 @@ gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t in
 	OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
 	OUT_BATCH(0);
 	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
+	if (IS_GEN7(batch->devid))
+		OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
+	else
+		OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
 	/* interface descriptor address, is relative to the dynamics base address */
 	OUT_BATCH(interface_descriptor);
 }
@@ -302,6 +297,8 @@ gen7_emit_media_objects(struct intel_batchbuffer *batch,
 			/* inline data (xoffset, yoffset) */
 			OUT_BATCH(x + i * 16);
 			OUT_BATCH(y + j * 16);
+			if (AT_LEAST_GEN(batch->devid, 8) && !IS_CHERRYVIEW(batch->devid))
+				gen8_emit_media_state_flush(batch);
 		}
 	}
 }
@@ -363,33 +360,6 @@ gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
 	OUT_BATCH(0xffffffff);
 }
 
-void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-
-uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
 uint32_t
 gen8_fill_surface_state(struct intel_batchbuffer *batch,
 			struct igt_buf *buf,
@@ -441,21 +411,6 @@ gen8_fill_surface_state(struct intel_batchbuffer *batch,
 	return offset;
 }
 
-uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst,
-						GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
 uint32_t
 gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size)
 {
@@ -463,8 +418,8 @@ gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *
 	uint32_t offset;
 	uint32_t binding_table_offset, kernel_offset;
 
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen8_fill_media_kernel(batch, kernel, size);
+	binding_table_offset = gen7_fill_binding_table(batch, dst);
+	kernel_offset = gen7_fill_kernel(batch, kernel, size);
 
 	idd = batch_alloc(batch, sizeof(*idd), 64);
 	offset = batch_offset(batch, idd);
@@ -522,10 +477,17 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch)
 	OUT_BATCH(1 << 12 | 1);
 }
 
+void
+gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
+	OUT_BATCH(0);
+}
+
 void
 gen8_emit_vfe_state(struct intel_batchbuffer *batch)
 {
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
+	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
 
 	/* scratch buffer */
 	OUT_BATCH(0);
@@ -550,7 +512,7 @@ gen8_emit_vfe_state(struct intel_batchbuffer *batch)
 void
 gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
 {
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
+	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
 
 	/* scratch buffer */
 	OUT_BATCH(0);
@@ -570,92 +532,6 @@ gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
 	OUT_BATCH(0);
 }
 
-void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-void
-gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
-	OUT_BATCH(0);
-}
-
-void
-gen8_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-			gen8_emit_media_state_flush(batch);
-		}
-	}
-}
-void
-gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-		}
-	}
-}
 void
 gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
 		     unsigned x, unsigned y,
diff --git a/lib/gpu_fill.h b/lib/gpu_fill.h
index 87e62c86..072e9f7c 100644
--- a/lib/gpu_fill.h
+++ b/lib/gpu_fill.h
@@ -70,12 +70,7 @@ gen7_fill_binding_table(struct intel_batchbuffer *batch,
 			struct igt_buf *dst);
 
 uint32_t
-gen7_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size);
-
-uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
+gen7_fill_kernel(struct intel_batchbuffer *batch,
 		const uint32_t kernel[][4],
 		size_t size);
 
@@ -108,53 +103,26 @@ gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
 		     unsigned x, unsigned y,
 		     unsigned width, unsigned height);
 
-void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
-
-uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color);
-
 uint32_t
 gen8_fill_surface_state(struct intel_batchbuffer *batch,
 			struct igt_buf *buf,
 			uint32_t format,
 			int is_dst);
 
-uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst);
-
 uint32_t
 gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size);
 
 void
 gen8_emit_state_base_address(struct intel_batchbuffer *batch);
 
-void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch);
-
-void
-gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
-
-void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
-
-void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor);
-
 void
 gen8_emit_media_state_flush(struct intel_batchbuffer *batch);
 
 void
-gen8_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height);
+gen8_emit_vfe_state(struct intel_batchbuffer *batch);
 
 void
-gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height);
+gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
 
 void
 gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 7c04ccf3..10d4dce8 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -796,12 +796,10 @@ igt_fillfunc_t igt_get_media_fillfunc(int devid)
 
 	if (IS_GEN9(devid))
 		fill = gen9_media_fillfunc;
-	else if (IS_BROADWELL(devid))
+	else if (IS_GEN8(devid))
 		fill = gen8_media_fillfunc;
 	else if (IS_GEN7(devid))
 		fill = gen7_media_fillfunc;
-	else if (IS_CHERRYVIEW(devid))
-		fill = gen8lp_media_fillfunc;
 
 	return fill;
 }
diff --git a/lib/media_fill.h b/lib/media_fill.h
index 226489cb..161af8cf 100644
--- a/lib/media_fill.h
+++ b/lib/media_fill.h
@@ -18,13 +18,6 @@ gen7_media_fillfunc(struct intel_batchbuffer *batch,
                 unsigned width, unsigned height,
                 uint8_t color);
 
-void
-gen8lp_media_fillfunc(struct intel_batchbuffer *batch,
-		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
-		uint8_t color);
-
 void
 gen9_media_fillfunc(struct intel_batchbuffer *batch,
                 struct igt_buf *dst,
diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
index 4270997e..362abd61 100644
--- a/lib/media_fill_gen8.c
+++ b/lib/media_fill_gen8.c
@@ -62,7 +62,7 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
 	/* setup states */
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
-	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
+	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
@@ -73,17 +73,17 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
 
 	gen8_emit_vfe_state(batch);
 
-	gen8_emit_curbe_load(batch, curbe_buffer);
+	gen7_emit_curbe_load(batch, curbe_buffer);
 
-	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
+	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
 
-	gen8_emit_media_objects(batch, x, y, width, height);
+	gen7_emit_media_objects(batch, x, y, width, height);
 
 	OUT_BATCH(MI_BATCH_BUFFER_END);
 
 	batch_end = batch_align(batch, 8);
 	igt_assert(batch_end < BATCH_STATE_SPLIT);
 
-	gen8_render_flush(batch, batch_end);
+	gen7_render_flush(batch, batch_end);
 	intel_batchbuffer_reset(batch);
 }
diff --git a/lib/media_fill_gen8lp.c b/lib/media_fill_gen8lp.c
deleted file mode 100644
index dcc11982..00000000
--- a/lib/media_fill_gen8lp.c
+++ /dev/null
@@ -1,87 +0,0 @@
-#include <intel_bufmgr.h>
-#include <i915_drm.h>
-
-#include "media_fill.h"
-#include "gen8_media.h"
-#include "intel_reg.h"
-#include "drmtest.h"
-#include "gpu_fill.h"
-#include <assert.h>
-
-
-static const uint32_t media_kernel[][4] = {
-	{ 0x00400001, 0x20202288, 0x00000020, 0x00000000 },
-	{ 0x00600001, 0x20800208, 0x008d0000, 0x00000000 },
-	{ 0x00200001, 0x20800208, 0x00450040, 0x00000000 },
-	{ 0x00000001, 0x20880608, 0x00000000, 0x000f000f },
-	{ 0x00800001, 0x20a00208, 0x00000020, 0x00000000 },
-	{ 0x00800001, 0x20e00208, 0x00000020, 0x00000000 },
-	{ 0x00800001, 0x21200208, 0x00000020, 0x00000000 },
-	{ 0x00800001, 0x21600208, 0x00000020, 0x00000000 },
-	{ 0x0c800031, 0x24000a40, 0x0e000080, 0x120a8000 },
-	{ 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
-	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
-};
-
-/*
- * This sets up the media pipeline,
- *
- * +---------------+ <---- 4096
- * |       ^       |
- * |       |       |
- * |    various    |
- * |      state    |
- * |       |       |
- * |_______|_______| <---- 2048 + ?
- * |       ^       |
- * |       |       |
- * |   batch       |
- * |    commands   |
- * |       |       |
- * |       |       |
- * +---------------+ <---- 0 + ?
- *
- */
-
-#define BATCH_STATE_SPLIT 2048
-
-void
-gen8lp_media_fillfunc(struct intel_batchbuffer *batch,
-		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
-		uint8_t color)
-{
-	uint32_t curbe_buffer, interface_descriptor;
-	uint32_t batch_end;
-
-	intel_batchbuffer_flush(batch);
-
-	/* setup states */
-	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
-
-	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
-	igt_assert(batch->ptr < &batch->buffer[4095]);
-
-	/* media pipeline */
-	batch->ptr = batch->buffer;
-	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
-	gen8_emit_state_base_address(batch);
-
-	gen8_emit_vfe_state(batch);
-
-	gen8_emit_curbe_load(batch, curbe_buffer);
-
-	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
-
-	gen8lp_emit_media_objects(batch, x, y, width, height);
-
-	OUT_BATCH(MI_BATCH_BUFFER_END);
-
-	batch_end = batch_align(batch, 8);
-	igt_assert(batch_end < BATCH_STATE_SPLIT);
-
-	gen8_render_flush(batch, batch_end);
-	intel_batchbuffer_reset(batch);
-}
diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
index 6accdbe4..d1335fe6 100644
--- a/lib/media_fill_gen9.c
+++ b/lib/media_fill_gen9.c
@@ -59,7 +59,7 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 	/* setup states */
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
-	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
+	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	assert(batch->ptr < &batch->buffer[4095]);
 
@@ -75,11 +75,11 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 
 	gen8_emit_vfe_state(batch);
 
-	gen8_emit_curbe_load(batch, curbe_buffer);
+	gen7_emit_curbe_load(batch, curbe_buffer);
 
-	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
+	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
 
-	gen8_emit_media_objects(batch, x, y, width, height);
+	gen7_emit_media_objects(batch, x, y, width, height);
 
 	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
 			GEN9_FORCE_MEDIA_AWAKE_DISABLE |
@@ -93,6 +93,6 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 	batch_end = batch_align(batch, 8);
 	assert(batch_end < BATCH_STATE_SPLIT);
 
-	gen8_render_flush(batch, batch_end);
+	gen7_render_flush(batch, batch_end);
 	intel_batchbuffer_reset(batch);
 }
diff --git a/lib/meson.build b/lib/meson.build
index 385e08b9..5f2567fb 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -26,7 +26,6 @@ lib_sources = [
 	'ioctl_wrappers.c',
 	'media_fill_gen7.c',
 	'media_fill_gen8.c',
-	'media_fill_gen8lp.c',
 	'media_fill_gen9.c',
 	'media_spin.c',
 	'gpgpu_fill.c',
-- 
2.14.3

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v5 3/4] lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                   ` (11 preceding siblings ...)
  2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 2/4] lib: Remove duplications in " Katarzyna Dec
@ 2018-04-10 11:34 ` Katarzyna Dec
  2018-04-10 13:16   ` Kalamarz, Lukasz
  2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 4/4] lib: Adjust refactored gpu_fill library to our coding style Katarzyna Dec
                   ` (2 subsequent siblings)
  15 siblings, 1 reply; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 11:34 UTC (permalink / raw)
  To: igt-dev

There are missing parameters for Gen8 configuration of gpgpu_fill
that are causing GPU hangs on newer hardware. We need to set the
number of threads in TG in gen8_fill_interface_descriptor. This
field was omitted (apparently without any side effects), but
according to bspec from BDW this field cannot be set to 0. We also
need to use pipeline selection mask to gen9_gpgpu_fillfunc, which
is necessary from SKL.

v2: rebased on refactored library
v3: Removed replacing gen7_emit_interface_descriptor_load with gen8
version in gen9_gpgpgu_fillfunc, because during refactoring gen8
function was removed.
v4: rebase on series new version

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 lib/gpgpu_fill.c | 3 ++-
 lib/gpu_fill.c   | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index 579ce78d..5a77ebd4 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -223,7 +223,8 @@ gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = batch->buffer;
 
 	/* GPGPU pipeline */
-	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
+	OUT_BATCH(GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
+		  PIPELINE_SELECT_GPGPU);
 
 	gen9_emit_state_base_address(batch);
 	gen8_emit_vfe_state_gpgpu(batch);
diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
index 7d99dfd9..102f141b 100644
--- a/lib/gpu_fill.c
+++ b/lib/gpu_fill.c
@@ -438,6 +438,8 @@ gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *
 	idd->desc5.constant_urb_entry_read_offset = 0;
 	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
 
+	idd->desc6.num_threads_in_tg = 1;
+
 	return offset;
 }
 
-- 
2.14.3

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v5 4/4] lib: Adjust refactored gpu_fill library to our coding style
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                   ` (12 preceding siblings ...)
  2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 3/4] lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill Katarzyna Dec
@ 2018-04-10 11:34 ` Katarzyna Dec
  2018-04-10 13:42   ` Katarzyna Dec
  2018-04-10 13:46 ` [igt-dev] ✓ Fi.CI.BAT: success for lib/gen6_render: Refactoring lib (rev4) Patchwork
  2018-04-10 15:04 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
  15 siblings, 1 reply; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 11:34 UTC (permalink / raw)
  To: igt-dev

While I am making changes in gpgpu and media fill area let's
adjust code to our coding style.

v2: rebased on series new version (patch is now last from
series so change seems larger)
v3: rebased

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 lib/gpgpu_fill.c      |  24 ++++-----
 lib/gpgpu_fill.h      |  12 ++---
 lib/gpu_fill.c        | 142 +++++++++++++++++++++++---------------------------
 lib/media_fill.h      |  20 +++----
 lib/media_fill_gen7.c |   7 ++-
 lib/media_fill_gen8.c |   7 +--
 lib/media_fill_gen9.c |   7 +--
 7 files changed, 105 insertions(+), 114 deletions(-)

diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index 5a77ebd4..72a1445a 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -99,8 +99,8 @@ static const uint32_t gen9_gpgpu_kernel[][4] = {
 void
 gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -120,8 +120,8 @@ gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 
 	interface_descriptor = gen7_fill_interface_descriptor(batch, dst,
-							      gen7_gpgpu_kernel,
-							      sizeof(gen7_gpgpu_kernel));
+				gen7_gpgpu_kernel, sizeof(gen7_gpgpu_kernel));
+
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	batch->ptr = batch->buffer;
@@ -147,8 +147,8 @@ gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 void
 gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -168,8 +168,8 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 
 	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
-							      gen8_gpgpu_kernel,
-							      sizeof(gen8_gpgpu_kernel));
+				gen8_gpgpu_kernel, sizeof(gen8_gpgpu_kernel));
+
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	batch->ptr = batch->buffer;
@@ -195,8 +195,8 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 void
 gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -216,8 +216,8 @@ gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 
 	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
-							      gen9_gpgpu_kernel,
-							      sizeof(gen9_gpgpu_kernel));
+				gen9_gpgpu_kernel, sizeof(gen9_gpgpu_kernel));
+
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	batch->ptr = batch->buffer;
diff --git a/lib/gpgpu_fill.h b/lib/gpgpu_fill.h
index 7b5c8322..f0d188ae 100644
--- a/lib/gpgpu_fill.h
+++ b/lib/gpgpu_fill.h
@@ -30,22 +30,22 @@
 void
 gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color);
 
 void
 gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color);
 
 void
 gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color);
 
 #endif /* GPGPU_FILL_H */
diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
index 102f141b..fc28a945 100644
--- a/lib/gpu_fill.c
+++ b/lib/gpu_fill.c
@@ -10,6 +10,7 @@ uint32_t
 batch_align(struct intel_batchbuffer *batch, uint32_t align)
 {
 	uint32_t offset = batch_used(batch);
+
 	offset = ALIGN(offset, align);
 	batch->ptr = batch->buffer + offset;
 	return offset;
@@ -19,6 +20,7 @@ void *
 batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
 {
 	uint32_t offset = batch_align(batch, align);
+
 	batch->ptr += size;
 	return memset(batch->buffer + offset, 0, size);
 }
@@ -30,9 +32,11 @@ batch_offset(struct intel_batchbuffer *batch, void *ptr)
 }
 
 uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
+batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size,
+	   uint32_t align)
 {
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
+	return batch_offset(batch, memcpy(batch_alloc(batch, size, align),
+			    ptr, size));
 }
 
 void
@@ -43,13 +47,13 @@ gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
 	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
 	if (ret == 0)
 		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
+					    NULL, 0, 0, 0);
 	igt_assert(ret == 0);
 }
 
 uint32_t
 gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
+			    uint8_t color)
 {
 	uint8_t *curbe_buffer;
 	uint32_t offset;
@@ -62,10 +66,8 @@ gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
 }
 
 uint32_t
-gen7_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
+gen7_fill_surface_state(struct intel_batchbuffer *batch, struct igt_buf *buf,
+			uint32_t format, int is_dst)
 {
 	struct gen7_surface_state *ss;
 	uint32_t write_domain, read_domain, offset;
@@ -111,8 +113,7 @@ gen7_fill_surface_state(struct intel_batchbuffer *batch,
 }
 
 uint32_t
-gen7_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
+gen7_fill_binding_table(struct intel_batchbuffer *batch, struct igt_buf *dst)
 {
 	uint32_t *binding_table, offset;
 
@@ -129,9 +130,8 @@ gen7_fill_binding_table(struct intel_batchbuffer *batch,
 }
 
 uint32_t
-gen7_fill_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
+gen7_fill_kernel(struct intel_batchbuffer *batch, const uint32_t kernel[][4],
+		 size_t size)
 {
 	uint32_t offset;
 
@@ -141,8 +141,9 @@ gen7_fill_kernel(struct intel_batchbuffer *batch,
 }
 
 uint32_t
-gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
-			       const uint32_t kernel[][4], size_t size)
+gen7_fill_interface_descriptor(struct intel_batchbuffer *batch,
+			       struct igt_buf *dst, const uint32_t kernel[][4],
+			       size_t size)
 {
 	struct gen7_interface_descriptor_data *idd;
 	uint32_t offset;
@@ -180,16 +181,19 @@ gen7_emit_state_base_address(struct intel_batchbuffer *batch)
 	OUT_BATCH(0);
 
 	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+		  BASE_ADDRESS_MODIFY);
 
 	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+		  BASE_ADDRESS_MODIFY);
 
 	/* indirect */
 	OUT_BATCH(0);
 
 	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+		  BASE_ADDRESS_MODIFY);
 
 	/* general/dynamic/indirect/instruction access Bound */
 	OUT_BATCH(0);
@@ -214,7 +218,7 @@ gen7_emit_vfe_state(struct intel_batchbuffer *batch)
 
 	/* urb entry size & curbe size */
 	OUT_BATCH(2 << 16 | 	/* in 256 bits unit */
-		2);		/* in 256 bits unit */
+		  2);		/* in 256 bits unit */
 
 	/* scoreboard */
 	OUT_BATCH(0);
@@ -268,14 +272,16 @@ gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t in
 		OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
 	else
 		OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
+	/* interface descriptor address, is relative to the dynamics base
+	 * address
+	 */
 	OUT_BATCH(interface_descriptor);
 }
 
 void
 gen7_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
+			unsigned int x, unsigned int y,
+			unsigned int width, unsigned int height)
 {
 	int i, j;
 
@@ -297,7 +303,8 @@ gen7_emit_media_objects(struct intel_batchbuffer *batch,
 			/* inline data (xoffset, yoffset) */
 			OUT_BATCH(x + i * 16);
 			OUT_BATCH(y + j * 16);
-			if (AT_LEAST_GEN(batch->devid, 8) && !IS_CHERRYVIEW(batch->devid))
+			if (AT_LEAST_GEN(batch->devid, 8) &&
+			    !IS_CHERRYVIEW(batch->devid))
 				gen8_emit_media_state_flush(batch);
 		}
 	}
@@ -305,8 +312,8 @@ gen7_emit_media_objects(struct intel_batchbuffer *batch,
 
 void
 gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
-		     unsigned x, unsigned y,
-		     unsigned width, unsigned height)
+		     unsigned int x, unsigned int y,
+		     unsigned int width, unsigned int height)
 {
 	uint32_t x_dim, y_dim, tmp, right_mask;
 
@@ -459,8 +466,8 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch)
 	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
 
 	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		0, BASE_ADDRESS_MODIFY);
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER |
+		  I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
 
 	/* indirect */
 	OUT_BATCH(0);
@@ -475,7 +482,9 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch)
 	OUT_BATCH(1 << 12 | 1);
 	/* indirect object buffer size */
 	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
+	/* instruction buffer size, must set modify enable bit, otherwise it
+	 * may result in GPU hang
+	 */
 	OUT_BATCH(1 << 12 | 1);
 }
 
@@ -536,62 +545,41 @@ gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
 
 void
 gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
-		     unsigned x, unsigned y,
-		     unsigned width, unsigned height)
+		     unsigned int x, unsigned int y,
+		     unsigned int width, unsigned int height)
 {
-	uint32_t x_dim, y_dim, tmp, right_mask;
-
-	/*
-	 * Simply do SIMD16 based dispatch, so every thread uses
-	 * SIMD16 channels.
-	 *
-	 * Define our own thread group size, e.g 16x1 for every group, then
-	 * will have 1 thread each group in SIMD16 dispatch. So thread
-	 * width/height/depth are all 1.
-	 *
-	 * Then thread group X = width / 16 (aligned to 16)
-	 * thread group Y = height;
-	 */
-	x_dim = (width + 15) / 16;
-	y_dim = height;
-
-	tmp = width & 15;
-	if (tmp == 0)
-		right_mask = (1 << 16) - 1;
-	else
-		right_mask = (1 << tmp) - 1;
-
-	OUT_BATCH(GEN7_GPGPU_WALKER | 13);
+	/* general */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
 
-	OUT_BATCH(0); /* kernel offset */
-	OUT_BATCH(0); /* indirect data length */
-	OUT_BATCH(0); /* indirect data offset */
+	/* stateless data port */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
 
-	/* SIMD size, thread w/h/d */
-	OUT_BATCH(1 << 30 | /* SIMD16 */
-		  0 << 16 | /* depth:1 */
-		  0 << 8 | /* height:1 */
-		  0); /* width:1 */
+	/* surface */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
 
-	/* thread group X */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(x_dim);
+	/* dynamic */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER |
+		  I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
 
-	/* thread group Y */
-	OUT_BATCH(0);
+	/* indirect */
 	OUT_BATCH(0);
-	OUT_BATCH(y_dim);
-
-	/* thread group Z */
 	OUT_BATCH(0);
-	OUT_BATCH(1);
 
-	/* right mask */
-	OUT_BATCH(right_mask);
+	/* instruction */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+		  BASE_ADDRESS_MODIFY);
 
-	/* bottom mask, height 1, always 0xffffffff */
-	OUT_BATCH(0xffffffff);
+	/* general state buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* dynamic state buffer size */
+	OUT_BATCH(1 << 12 | 1);
+	/* indirect object buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* instruction buffer size, must set modify enable bit, otherwise it
+	 * may result in GPU hang
+	 */
+	OUT_BATCH(1 << 12 | 1);
 }
 
 void
@@ -626,7 +614,9 @@ gen9_emit_state_base_address(struct intel_batchbuffer *batch)
 	OUT_BATCH(1 << 12 | 1);
 	/* indirect object buffer size */
 	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
+	/* intruction buffer size, must set modify enable bit, otherwise it may
+	 * result in GPU hang
+	 */
 	OUT_BATCH(1 << 12 | 1);
 
 	/* Bindless surface state base address */
diff --git a/lib/media_fill.h b/lib/media_fill.h
index 161af8cf..f6db734e 100644
--- a/lib/media_fill.h
+++ b/lib/media_fill.h
@@ -7,22 +7,22 @@
 void
 gen8_media_fillfunc(struct intel_batchbuffer *batch,
 		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
 		uint8_t color);
 
 void
 gen7_media_fillfunc(struct intel_batchbuffer *batch,
-                struct igt_buf *dst,
-                unsigned x, unsigned y,
-                unsigned width, unsigned height,
-                uint8_t color);
+		struct igt_buf *dst,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
+		uint8_t color);
 
 void
 gen9_media_fillfunc(struct intel_batchbuffer *batch,
-                struct igt_buf *dst,
-                unsigned x, unsigned y,
-                unsigned width, unsigned height,
-                uint8_t color);
+		struct igt_buf *dst,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
+		uint8_t color);
 
 #endif /* RENDE_MEDIA_FILL_H */
diff --git a/lib/media_fill_gen7.c b/lib/media_fill_gen7.c
index c97555a6..5a8c32fb 100644
--- a/lib/media_fill_gen7.c
+++ b/lib/media_fill_gen7.c
@@ -47,8 +47,8 @@ static const uint32_t media_kernel[][4] = {
 void
 gen7_media_fillfunc(struct intel_batchbuffer *batch,
 		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
 		uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -61,8 +61,7 @@ gen7_media_fillfunc(struct intel_batchbuffer *batch,
 
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 	interface_descriptor = gen7_fill_interface_descriptor(batch, dst,
-							      media_kernel,
-							      sizeof(media_kernel));
+					media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
index 362abd61..d6dd7410 100644
--- a/lib/media_fill_gen8.c
+++ b/lib/media_fill_gen8.c
@@ -50,8 +50,8 @@ static const uint32_t media_kernel[][4] = {
 void
 gen8_media_fillfunc(struct intel_batchbuffer *batch,
 		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
 		uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -63,7 +63,8 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
+					media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
index d1335fe6..a9a829f2 100644
--- a/lib/media_fill_gen9.c
+++ b/lib/media_fill_gen9.c
@@ -47,8 +47,8 @@ static const uint32_t media_kernel[][4] = {
 void
 gen9_media_fillfunc(struct intel_batchbuffer *batch,
 		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
 		uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -60,7 +60,8 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
+					media_kernel, sizeof(media_kernel));
 	assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
-- 
2.14.3

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v5 2/4] lib: Remove duplications in gpu_fill library
  2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 2/4] lib: Remove duplications in " Katarzyna Dec
@ 2018-04-10 13:14   ` Kalamarz, Lukasz
  2018-04-10 13:35     ` Katarzyna Dec
  0 siblings, 1 reply; 28+ messages in thread
From: Kalamarz, Lukasz @ 2018-04-10 13:14 UTC (permalink / raw)
  To: Dec, Katarzyna, igt-dev@lists.freedesktop.org

On Tue, 2018-04-10 at 13:34 +0200, Katarzyna Dec wrote:
> After moving all functions needed for gpgpu and media fill testing
> there is a lot of duplications which can be removed:
>   Library media_fill_gen8 and media_fill_gen8lp for CHT was removed,
> media state flush for !CHT was added to gen7_emit_media_objects.
>   Many gen8 functions were replaced with gen7 version with devid
> parameter (gen7_fill_curbe_load, gen7_emit_interface_descriptor,
> gen7_fill_binding_table, gen7_emit_media_objects). Unified fill
> kernel
> function so it is applicable to all gens and both media and gpgpu
> (merged gen7_fill_media_kernel and gen8_fill_media_kernel).
>   Duplicated constants like GEN8_MEDIA_VFE_STATE,
> GEN8_MEDIA_CURBE_LOAD,
> GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, GEN8_MEDIA_OBJECT were
> replaced by GEN7 version. However this constants were not removed
> from gen8_media.h library, because they are used by other tests
> for Gen8+. More refactoring in this gen*_media.h libraries is needed.
> 
> It seems that further unification of *_fillfunc functions will
> introduce more confusion in understanding what the tests are doing
> and what were changes between Gens.
> 
> v2: Moved some reduntant changes from Move gpgpu/media fill to
> gpu_fill...
> to this patch. Applied comments from review.
> 
> v3: rebase
> 
> Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
> Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
There are some registers or their values, which are still duplicated
under different name. I assume that after this series will be merged,
You will work on refactoring header files. If not, then please fix
entries mentioned below.

Reviewed-by: Lukasz Kalamarz <lukasz.kalamarz@intel.com> 
> ---
> 
GEN7_SURFACE_2D == GEN8_SURFACE_2D
GEN7_SURFACEFORMAT_R8_UNORM == GEN8_SURFACEFORMAT_R8_UNORM
GEN7_FLOATING_POINT_IEEE_754 == GEN8_FLOATING_POINT_IEEE_754
GEN7_STATE_BASE_ADDRESS == GEN8_STATE_BASE_ADDRESS
GEN7_PIPELINE_SELECT == GEN8_PIPELINE_SELECT

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v5 3/4] lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill
  2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 3/4] lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill Katarzyna Dec
@ 2018-04-10 13:16   ` Kalamarz, Lukasz
  0 siblings, 0 replies; 28+ messages in thread
From: Kalamarz, Lukasz @ 2018-04-10 13:16 UTC (permalink / raw)
  To: Dec, Katarzyna, igt-dev@lists.freedesktop.org

On Tue, 2018-04-10 at 13:34 +0200, Katarzyna Dec wrote:
> There are missing parameters for Gen8 configuration of gpgpu_fill
> that are causing GPU hangs on newer hardware. We need to set the
> number of threads in TG in gen8_fill_interface_descriptor. This
> field was omitted (apparently without any side effects), but
> according to bspec from BDW this field cannot be set to 0. We also
> need to use pipeline selection mask to gen9_gpgpu_fillfunc, which
> is necessary from SKL.
> 
> v2: rebased on refactored library
> v3: Removed replacing gen7_emit_interface_descriptor_load with gen8
> version in gen9_gpgpgu_fillfunc, because during refactoring gen8
> function was removed.
> v4: rebase on series new version
> 
> Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
> Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

Looks good for me.
Reviewed-by: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v2] lib/gen6_render: Refactoring lib
  2018-04-10 11:28 ` [igt-dev] [PATCH i-g-t v2] lib/gen6_render: Refactoring lib Lukasz Kalamarz
@ 2018-04-10 13:28   ` Katarzyna Dec
  2018-04-10 14:19   ` [igt-dev] [PATCH i-g-t v6 1/4] lib: Move common gpgpu/media fill functions to gpu_fill library Katarzyna Dec
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 13:28 UTC (permalink / raw)
  To: Lukasz Kalamarz; +Cc: igt-dev

On Tue, Apr 10, 2018 at 01:28:14PM +0200, Lukasz Kalamarz wrote:
> This patch is starting a series of refactoring changes for *render*
> libs. A lot of code in those libraries is copy/pasted and renamed for
> different gen.
> 
> Changes made in this patch:
> - removal of duplicated registers definitions
> - move field definitions above it register definition
> - move definitions of register into ascending order
> - unify spaces between register name and it's address/value
> 
> v2:
> - Fix warnings from check_patch script.
> 
> Signed-off-by: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
> Cc: Katarzyna Dec <katarzyna.dec@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

LGTM :) Nice work!
Reviewed-by: Katarzyna Dec <katarzyna.dec@intel.com> 
> -- 
> 2.9.5
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v5 2/4] lib: Remove duplications in gpu_fill library
  2018-04-10 13:14   ` Kalamarz, Lukasz
@ 2018-04-10 13:35     ` Katarzyna Dec
  0 siblings, 0 replies; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 13:35 UTC (permalink / raw)
  To: Kalamarz, Lukasz; +Cc: igt-dev

On Tue, Apr 10, 2018 at 02:14:35PM +0100, Kalamarz, Lukasz wrote:
> On Tue, 2018-04-10 at 13:34 +0200, Katarzyna Dec wrote:
> > After moving all functions needed for gpgpu and media fill testing
> > there is a lot of duplications which can be removed:
> >   Library media_fill_gen8 and media_fill_gen8lp for CHT was removed,
> > media state flush for !CHT was added to gen7_emit_media_objects.
> >   Many gen8 functions were replaced with gen7 version with devid
> > parameter (gen7_fill_curbe_load, gen7_emit_interface_descriptor,
> > gen7_fill_binding_table, gen7_emit_media_objects). Unified fill
> > kernel
> > function so it is applicable to all gens and both media and gpgpu
> > (merged gen7_fill_media_kernel and gen8_fill_media_kernel).
> >   Duplicated constants like GEN8_MEDIA_VFE_STATE,
> > GEN8_MEDIA_CURBE_LOAD,
> > GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, GEN8_MEDIA_OBJECT were
> > replaced by GEN7 version. However this constants were not removed
> > from gen8_media.h library, because they are used by other tests
> > for Gen8+. More refactoring in this gen*_media.h libraries is needed.
> > 
> > It seems that further unification of *_fillfunc functions will
> > introduce more confusion in understanding what the tests are doing
> > and what were changes between Gens.
> > 
> > v2: Moved some reduntant changes from Move gpgpu/media fill to
> > gpu_fill...
> > to this patch. Applied comments from review.
> > 
> > v3: rebase
> > 
> > Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
> > Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> There are some registers or their values, which are still duplicated
> under different name. I assume that after this series will be merged,
> You will work on refactoring header files. If not, then please fix
> entries mentioned below.
> 
> Reviewed-by: Lukasz Kalamarz <lukasz.kalamarz@intel.com> 
> > ---
> > 
> GEN7_SURFACE_2D == GEN8_SURFACE_2D
> GEN7_SURFACEFORMAT_R8_UNORM == GEN8_SURFACEFORMAT_R8_UNORM
> GEN7_FLOATING_POINT_IEEE_754 == GEN8_FLOATING_POINT_IEEE_754
> GEN7_STATE_BASE_ADDRESS == GEN8_STATE_BASE_ADDRESS
> GEN7_PIPELINE_SELECT == GEN8_PIPELINE_SELECT
>
Hi Lukasz,
I know that this registers are still duplicated. The ones I touched
allowed to remove duplicated functions such as:
gen7_emit_vfe_state_gpgpu(batch);
gen7_emit_curbe_load(batch, curbe_buffer);
gen7_emit_interface_descriptor_load(batch, interface_descriptor);.
Examples you've mentioned are also connected with media_spin test.
Further refactoring in genX_media.h and media_spin lib is planned
after this patch and patch with batch_* will be accepted.

Thanks for the review :)
Kasia
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v5 4/4] lib: Adjust refactored gpu_fill library to our coding style
  2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 4/4] lib: Adjust refactored gpu_fill library to our coding style Katarzyna Dec
@ 2018-04-10 13:42   ` Katarzyna Dec
  0 siblings, 0 replies; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 13:42 UTC (permalink / raw)
  To: igt-dev

On Tue, Apr 10, 2018 at 01:34:45PM +0200, Katarzyna Dec wrote:
> While I am making changes in gpgpu and media fill area let's
> adjust code to our coding style.
> 
> v2: rebased on series new version (patch is now last from
> series so change seems larger)
> v3: rebased
> 
> Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
> Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> ---
>  lib/gpgpu_fill.c      |  24 ++++-----
>  lib/gpgpu_fill.h      |  12 ++---
>  lib/gpu_fill.c        | 142 +++++++++++++++++++++++---------------------------
>  lib/media_fill.h      |  20 +++----
>  lib/media_fill_gen7.c |   7 ++-
>  lib/media_fill_gen8.c |   7 +--
>  lib/media_fill_gen9.c |   7 +--
>  7 files changed, 105 insertions(+), 114 deletions(-)
> 
> diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
> index 5a77ebd4..72a1445a 100644
> --- a/lib/gpgpu_fill.c
> +++ b/lib/gpgpu_fill.c
> @@ -99,8 +99,8 @@ static const uint32_t gen9_gpgpu_kernel[][4] = {
>  void
>  gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>  		    struct igt_buf *dst,
> -		    unsigned x, unsigned y,
> -		    unsigned width, unsigned height,
> +		    unsigned int x, unsigned int y,
> +		    unsigned int width, unsigned int height,
>  		    uint8_t color)
>  {
>  	uint32_t curbe_buffer, interface_descriptor;
> @@ -120,8 +120,8 @@ gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>  	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
>  
>  	interface_descriptor = gen7_fill_interface_descriptor(batch, dst,
> -							      gen7_gpgpu_kernel,
> -							      sizeof(gen7_gpgpu_kernel));
> +				gen7_gpgpu_kernel, sizeof(gen7_gpgpu_kernel));
> +
>  	igt_assert(batch->ptr < &batch->buffer[4095]);
>  
>  	batch->ptr = batch->buffer;
> @@ -147,8 +147,8 @@ gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>  void
>  gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>  		    struct igt_buf *dst,
> -		    unsigned x, unsigned y,
> -		    unsigned width, unsigned height,
> +		    unsigned int x, unsigned int y,
> +		    unsigned int width, unsigned int height,
>  		    uint8_t color)
>  {
>  	uint32_t curbe_buffer, interface_descriptor;
> @@ -168,8 +168,8 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>  	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
>  
>  	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
> -							      gen8_gpgpu_kernel,
> -							      sizeof(gen8_gpgpu_kernel));
> +				gen8_gpgpu_kernel, sizeof(gen8_gpgpu_kernel));
> +
>  	igt_assert(batch->ptr < &batch->buffer[4095]);
>  
>  	batch->ptr = batch->buffer;
> @@ -195,8 +195,8 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>  void
>  gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>  		    struct igt_buf *dst,
> -		    unsigned x, unsigned y,
> -		    unsigned width, unsigned height,
> +		    unsigned int x, unsigned int y,
> +		    unsigned int width, unsigned int height,
>  		    uint8_t color)
>  {
>  	uint32_t curbe_buffer, interface_descriptor;
> @@ -216,8 +216,8 @@ gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>  	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
>  
>  	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
> -							      gen9_gpgpu_kernel,
> -							      sizeof(gen9_gpgpu_kernel));
> +				gen9_gpgpu_kernel, sizeof(gen9_gpgpu_kernel));
> +
>  	igt_assert(batch->ptr < &batch->buffer[4095]);
>  
>  	batch->ptr = batch->buffer;
> diff --git a/lib/gpgpu_fill.h b/lib/gpgpu_fill.h
> index 7b5c8322..f0d188ae 100644
> --- a/lib/gpgpu_fill.h
> +++ b/lib/gpgpu_fill.h
> @@ -30,22 +30,22 @@
>  void
>  gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>  		    struct igt_buf *dst,
> -		    unsigned x, unsigned y,
> -		    unsigned width, unsigned height,
> +		    unsigned int x, unsigned int y,
> +		    unsigned int width, unsigned int height,
>  		    uint8_t color);
>  
>  void
>  gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>  		    struct igt_buf *dst,
> -		    unsigned x, unsigned y,
> -		    unsigned width, unsigned height,
> +		    unsigned int x, unsigned int y,
> +		    unsigned int width, unsigned int height,
>  		    uint8_t color);
>  
>  void
>  gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>  		    struct igt_buf *dst,
> -		    unsigned x, unsigned y,
> -		    unsigned width, unsigned height,
> +		    unsigned int x, unsigned int y,
> +		    unsigned int width, unsigned int height,
>  		    uint8_t color);
>  
>  #endif /* GPGPU_FILL_H */
> diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
> index 102f141b..fc28a945 100644
> --- a/lib/gpu_fill.c
> +++ b/lib/gpu_fill.c
> @@ -10,6 +10,7 @@ uint32_t
>  batch_align(struct intel_batchbuffer *batch, uint32_t align)
>  {
>  	uint32_t offset = batch_used(batch);
> +
>  	offset = ALIGN(offset, align);
>  	batch->ptr = batch->buffer + offset;
>  	return offset;
> @@ -19,6 +20,7 @@ void *
>  batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
>  {
>  	uint32_t offset = batch_align(batch, align);
> +
>  	batch->ptr += size;
>  	return memset(batch->buffer + offset, 0, size);
>  }
> @@ -30,9 +32,11 @@ batch_offset(struct intel_batchbuffer *batch, void *ptr)
>  }
>  
>  uint32_t
> -batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
> +batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size,
> +	   uint32_t align)
>  {
> -	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
> +	return batch_offset(batch, memcpy(batch_alloc(batch, size, align),
> +			    ptr, size));
>  }
>  
>  void
> @@ -43,13 +47,13 @@ gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
>  	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
>  	if (ret == 0)
>  		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
> -					NULL, 0, 0, 0);
> +					    NULL, 0, 0, 0);
>  	igt_assert(ret == 0);
>  }
>  
>  uint32_t
>  gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
> -			uint8_t color)
> +			    uint8_t color)
>  {
>  	uint8_t *curbe_buffer;
>  	uint32_t offset;
> @@ -62,10 +66,8 @@ gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
>  }
>  
>  uint32_t
> -gen7_fill_surface_state(struct intel_batchbuffer *batch,
> -			struct igt_buf *buf,
> -			uint32_t format,
> -			int is_dst)
> +gen7_fill_surface_state(struct intel_batchbuffer *batch, struct igt_buf *buf,
> +			uint32_t format, int is_dst)
>  {
>  	struct gen7_surface_state *ss;
>  	uint32_t write_domain, read_domain, offset;
> @@ -111,8 +113,7 @@ gen7_fill_surface_state(struct intel_batchbuffer *batch,
>  }
>  
>  uint32_t
> -gen7_fill_binding_table(struct intel_batchbuffer *batch,
> -			struct igt_buf *dst)
> +gen7_fill_binding_table(struct intel_batchbuffer *batch, struct igt_buf *dst)
>  {
>  	uint32_t *binding_table, offset;
>  
> @@ -129,9 +130,8 @@ gen7_fill_binding_table(struct intel_batchbuffer *batch,
>  }
>  
>  uint32_t
> -gen7_fill_kernel(struct intel_batchbuffer *batch,
> -		const uint32_t kernel[][4],
> -		size_t size)
> +gen7_fill_kernel(struct intel_batchbuffer *batch, const uint32_t kernel[][4],
> +		 size_t size)
>  {
>  	uint32_t offset;
>  
> @@ -141,8 +141,9 @@ gen7_fill_kernel(struct intel_batchbuffer *batch,
>  }
>  
>  uint32_t
> -gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
> -			       const uint32_t kernel[][4], size_t size)
> +gen7_fill_interface_descriptor(struct intel_batchbuffer *batch,
> +			       struct igt_buf *dst, const uint32_t kernel[][4],
> +			       size_t size)
>  {
>  	struct gen7_interface_descriptor_data *idd;
>  	uint32_t offset;
> @@ -180,16 +181,19 @@ gen7_emit_state_base_address(struct intel_batchbuffer *batch)
>  	OUT_BATCH(0);
>  
>  	/* surface */
> -	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
> +	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
> +		  BASE_ADDRESS_MODIFY);
>  
>  	/* dynamic */
> -	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
> +	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
> +		  BASE_ADDRESS_MODIFY);
>  
>  	/* indirect */
>  	OUT_BATCH(0);
>  
>  	/* instruction */
> -	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
> +	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
> +		  BASE_ADDRESS_MODIFY);
>  
>  	/* general/dynamic/indirect/instruction access Bound */
>  	OUT_BATCH(0);
> @@ -214,7 +218,7 @@ gen7_emit_vfe_state(struct intel_batchbuffer *batch)
>  
>  	/* urb entry size & curbe size */
>  	OUT_BATCH(2 << 16 | 	/* in 256 bits unit */
> -		2);		/* in 256 bits unit */
> +		  2);		/* in 256 bits unit */
>  
>  	/* scoreboard */
>  	OUT_BATCH(0);
> @@ -268,14 +272,16 @@ gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t in
>  		OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
>  	else
>  		OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
> -	/* interface descriptor address, is relative to the dynamics base address */
> +	/* interface descriptor address, is relative to the dynamics base
> +	 * address
> +	 */
>  	OUT_BATCH(interface_descriptor);
>  }
>  
>  void
>  gen7_emit_media_objects(struct intel_batchbuffer *batch,
> -			unsigned x, unsigned y,
> -			unsigned width, unsigned height)
> +			unsigned int x, unsigned int y,
> +			unsigned int width, unsigned int height)
>  {
>  	int i, j;
>  
> @@ -297,7 +303,8 @@ gen7_emit_media_objects(struct intel_batchbuffer *batch,
>  			/* inline data (xoffset, yoffset) */
>  			OUT_BATCH(x + i * 16);
>  			OUT_BATCH(y + j * 16);
> -			if (AT_LEAST_GEN(batch->devid, 8) && !IS_CHERRYVIEW(batch->devid))
> +			if (AT_LEAST_GEN(batch->devid, 8) &&
> +			    !IS_CHERRYVIEW(batch->devid))
>  				gen8_emit_media_state_flush(batch);
>  		}
>  	}
> @@ -305,8 +312,8 @@ gen7_emit_media_objects(struct intel_batchbuffer *batch,
>  
>  void
>  gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
> -		     unsigned x, unsigned y,
> -		     unsigned width, unsigned height)
> +		     unsigned int x, unsigned int y,
> +		     unsigned int width, unsigned int height)
>  {
>  	uint32_t x_dim, y_dim, tmp, right_mask;
>  
> @@ -459,8 +466,8 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch)
>  	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
>  
>  	/* dynamic */
> -	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
> -		0, BASE_ADDRESS_MODIFY);
> +	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER |
> +		  I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
>  
>  	/* indirect */
>  	OUT_BATCH(0);
> @@ -475,7 +482,9 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch)
>  	OUT_BATCH(1 << 12 | 1);
>  	/* indirect object buffer size */
>  	OUT_BATCH(0xfffff000 | 1);
> -	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
> +	/* instruction buffer size, must set modify enable bit, otherwise it
> +	 * may result in GPU hang
> +	 */
>  	OUT_BATCH(1 << 12 | 1);
>  }
>  
> @@ -536,62 +545,41 @@ gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
>  
>  void
>  gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
> -		     unsigned x, unsigned y,
> -		     unsigned width, unsigned height)
> +		     unsigned int x, unsigned int y,
> +		     unsigned int width, unsigned int height)
>  {
> -	uint32_t x_dim, y_dim, tmp, right_mask;
> -
It looks like something went wrong with rebase in this function (after changes in
patch1), nothing should be deleted from this function. For now gpgpu_fill is not
working because of this... Fix in progress.
> -	/*
> -	 * Simply do SIMD16 based dispatch, so every thread uses
> -	 * SIMD16 channels.
> -	 *
> -	 * Define our own thread group size, e.g 16x1 for every group, then
> -	 * will have 1 thread each group in SIMD16 dispatch. So thread
> -	 * width/height/depth are all 1.
> -	 *
> -	 * Then thread group X = width / 16 (aligned to 16)
> -	 * thread group Y = height;
> -	 */
> -	x_dim = (width + 15) / 16;
> -	y_dim = height;
> -
> -	tmp = width & 15;
> -	if (tmp == 0)
> -		right_mask = (1 << 16) - 1;
> -	else
> -		right_mask = (1 << tmp) - 1;
> -
> -	OUT_BATCH(GEN7_GPGPU_WALKER | 13);
> +	/* general */
> +	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
> +	OUT_BATCH(0);
>  
> -	OUT_BATCH(0); /* kernel offset */
> -	OUT_BATCH(0); /* indirect data length */
> -	OUT_BATCH(0); /* indirect data offset */
> +	/* stateless data port */
> +	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>  
> -	/* SIMD size, thread w/h/d */
> -	OUT_BATCH(1 << 30 | /* SIMD16 */
> -		  0 << 16 | /* depth:1 */
> -		  0 << 8 | /* height:1 */
> -		  0); /* width:1 */
> +	/* surface */
> +	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
>  
> -	/* thread group X */
> -	OUT_BATCH(0);
> -	OUT_BATCH(0);
> -	OUT_BATCH(x_dim);
> +	/* dynamic */
> +	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER |
> +		  I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
>  
> -	/* thread group Y */
> -	OUT_BATCH(0);
> +	/* indirect */
>  	OUT_BATCH(0);
> -	OUT_BATCH(y_dim);
> -
> -	/* thread group Z */
>  	OUT_BATCH(0);
> -	OUT_BATCH(1);
>  
> -	/* right mask */
> -	OUT_BATCH(right_mask);
> +	/* instruction */
> +	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
> +		  BASE_ADDRESS_MODIFY);
>  
> -	/* bottom mask, height 1, always 0xffffffff */
> -	OUT_BATCH(0xffffffff);
> +	/* general state buffer size */
> +	OUT_BATCH(0xfffff000 | 1);
> +	/* dynamic state buffer size */
> +	OUT_BATCH(1 << 12 | 1);
> +	/* indirect object buffer size */
> +	OUT_BATCH(0xfffff000 | 1);
> +	/* instruction buffer size, must set modify enable bit, otherwise it
> +	 * may result in GPU hang
> +	 */
> +	OUT_BATCH(1 << 12 | 1);
>  }
>  
>  void
> @@ -626,7 +614,9 @@ gen9_emit_state_base_address(struct intel_batchbuffer *batch)
>  	OUT_BATCH(1 << 12 | 1);
>  	/* indirect object buffer size */
>  	OUT_BATCH(0xfffff000 | 1);
> -	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
> +	/* intruction buffer size, must set modify enable bit, otherwise it may
> +	 * result in GPU hang
> +	 */
>  	OUT_BATCH(1 << 12 | 1);
>  
>  	/* Bindless surface state base address */
> diff --git a/lib/media_fill.h b/lib/media_fill.h
> index 161af8cf..f6db734e 100644
> --- a/lib/media_fill.h
> +++ b/lib/media_fill.h
> @@ -7,22 +7,22 @@
>  void
>  gen8_media_fillfunc(struct intel_batchbuffer *batch,
>  		struct igt_buf *dst,
> -		unsigned x, unsigned y,
> -		unsigned width, unsigned height,
> +		unsigned int x, unsigned int y,
> +		unsigned int width, unsigned int height,
>  		uint8_t color);
>  
>  void
>  gen7_media_fillfunc(struct intel_batchbuffer *batch,
> -                struct igt_buf *dst,
> -                unsigned x, unsigned y,
> -                unsigned width, unsigned height,
> -                uint8_t color);
> +		struct igt_buf *dst,
> +		unsigned int x, unsigned int y,
> +		unsigned int width, unsigned int height,
> +		uint8_t color);
>  
>  void
>  gen9_media_fillfunc(struct intel_batchbuffer *batch,
> -                struct igt_buf *dst,
> -                unsigned x, unsigned y,
> -                unsigned width, unsigned height,
> -                uint8_t color);
> +		struct igt_buf *dst,
> +		unsigned int x, unsigned int y,
> +		unsigned int width, unsigned int height,
> +		uint8_t color);
>  
>  #endif /* RENDE_MEDIA_FILL_H */
> diff --git a/lib/media_fill_gen7.c b/lib/media_fill_gen7.c
> index c97555a6..5a8c32fb 100644
> --- a/lib/media_fill_gen7.c
> +++ b/lib/media_fill_gen7.c
> @@ -47,8 +47,8 @@ static const uint32_t media_kernel[][4] = {
>  void
>  gen7_media_fillfunc(struct intel_batchbuffer *batch,
>  		struct igt_buf *dst,
> -		unsigned x, unsigned y,
> -		unsigned width, unsigned height,
> +		unsigned int x, unsigned int y,
> +		unsigned int width, unsigned int height,
>  		uint8_t color)
>  {
>  	uint32_t curbe_buffer, interface_descriptor;
> @@ -61,8 +61,7 @@ gen7_media_fillfunc(struct intel_batchbuffer *batch,
>  
>  	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
>  	interface_descriptor = gen7_fill_interface_descriptor(batch, dst,
> -							      media_kernel,
> -							      sizeof(media_kernel));
> +					media_kernel, sizeof(media_kernel));
>  	igt_assert(batch->ptr < &batch->buffer[4095]);
>  
>  	/* media pipeline */
> diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
> index 362abd61..d6dd7410 100644
> --- a/lib/media_fill_gen8.c
> +++ b/lib/media_fill_gen8.c
> @@ -50,8 +50,8 @@ static const uint32_t media_kernel[][4] = {
>  void
>  gen8_media_fillfunc(struct intel_batchbuffer *batch,
>  		struct igt_buf *dst,
> -		unsigned x, unsigned y,
> -		unsigned width, unsigned height,
> +		unsigned int x, unsigned int y,
> +		unsigned int width, unsigned int height,
>  		uint8_t color)
>  {
>  	uint32_t curbe_buffer, interface_descriptor;
> @@ -63,7 +63,8 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
>  	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
>  
>  	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
> -	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
> +	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
> +					media_kernel, sizeof(media_kernel));
>  	igt_assert(batch->ptr < &batch->buffer[4095]);
>  
>  	/* media pipeline */
> diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
> index d1335fe6..a9a829f2 100644
> --- a/lib/media_fill_gen9.c
> +++ b/lib/media_fill_gen9.c
> @@ -47,8 +47,8 @@ static const uint32_t media_kernel[][4] = {
>  void
>  gen9_media_fillfunc(struct intel_batchbuffer *batch,
>  		struct igt_buf *dst,
> -		unsigned x, unsigned y,
> -		unsigned width, unsigned height,
> +		unsigned int x, unsigned int y,
> +		unsigned int width, unsigned int height,
>  		uint8_t color)
>  {
>  	uint32_t curbe_buffer, interface_descriptor;
> @@ -60,7 +60,8 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
>  	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
>  
>  	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
> -	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
> +	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
> +					media_kernel, sizeof(media_kernel));
>  	assert(batch->ptr < &batch->buffer[4095]);
>  
>  	/* media pipeline */
> -- 
> 2.14.3
> 
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for lib/gen6_render: Refactoring lib (rev4)
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                   ` (13 preceding siblings ...)
  2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 4/4] lib: Adjust refactored gpu_fill library to our coding style Katarzyna Dec
@ 2018-04-10 13:46 ` Patchwork
  2018-04-10 15:04 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
  15 siblings, 0 replies; 28+ messages in thread
From: Patchwork @ 2018-04-10 13:46 UTC (permalink / raw)
  To: Katarzyna Dec; +Cc: igt-dev

== Series Details ==

Series: lib/gen6_render: Refactoring lib (rev4)
URL   : https://patchwork.freedesktop.org/series/41379/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
7c474e011548d35df6b80ceed81d3e6ca560c71d tests/perf: fix gen8 small cores whitelist expectation

with latest DRM-Tip kernel build CI_DRM_4040
8e7a3b1c5ebd drm-tip: 2018y-04m-10d-10h-47m-52s UTC integration manifest

No testlist changes.

---- Possible new issues:

Test gem_exec_gttfill:
        Subgroup basic:
                skip       -> PASS       (fi-pnv-d510)

---- Known issues:

Test debugfs_test:
        Subgroup read_all_entries:
                incomplete -> PASS       (fi-snb-2520m) fdo#103713
Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                pass       -> FAIL       (fi-gdg-551) fdo#102575
Test prime_vgem:
        Subgroup basic-fence-flip:
                pass       -> FAIL       (fi-ilk-650) fdo#104008

fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#104008 https://bugs.freedesktop.org/show_bug.cgi?id=104008

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:431s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:448s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:392s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:548s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:297s
fi-bxt-dsi       total:285  pass:255  dwarn:0   dfail:0   fail:0   skip:30  time:515s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:515s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:523s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:512s
fi-cfl-8700k     total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:411s
fi-cfl-s3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:563s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:510s
fi-cnl-y3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:590s
fi-elk-e7500     total:285  pass:226  dwarn:0   dfail:0   fail:0   skip:59  time:428s
fi-gdg-551       total:285  pass:176  dwarn:0   dfail:0   fail:1   skip:108 time:316s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:540s
fi-glk-j4005     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:489s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:404s
fi-ilk-650       total:285  pass:224  dwarn:0   dfail:0   fail:1   skip:60  time:425s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:468s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:435s
fi-kbl-7500u     total:285  pass:260  dwarn:1   dfail:0   fail:0   skip:24  time:471s
fi-kbl-7567u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:464s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:509s
fi-pnv-d510      total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:643s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:441s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:533s
fi-skl-6700k2    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:506s
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:501s
fi-skl-guc       total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:433s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:446s
fi-snb-2520m     total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:590s
fi-snb-2600      total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:401s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1240/issues.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v6 1/4] lib: Move common gpgpu/media fill functions to gpu_fill library
  2018-04-10 11:28 ` [igt-dev] [PATCH i-g-t v2] lib/gen6_render: Refactoring lib Lukasz Kalamarz
  2018-04-10 13:28   ` Katarzyna Dec
@ 2018-04-10 14:19   ` Katarzyna Dec
  2018-04-10 16:01     ` Antonio Argenziano
  2018-04-10 14:19   ` [igt-dev] [PATCH i-g-t v6 2/4] lib: Remove duplications in " Katarzyna Dec
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 14:19 UTC (permalink / raw)
  To: igt-dev

Gpgpu_fill and media_fill libraries are very similar and many
functions can be shared. I have created library gpu_fill with
all functions needed for implementing gpgpu_fill and media_fill
tests for all Gens. For reviewing and debugging purposes this patch
should be only moving functions from few libraries to one removing
functions identical for both media and gpgpu.
Places in the code that required more changes:
  Removing gen7_fill_gpgpu_kernel function that is identical to
gen7_fill_media_kernel and introduces conflict with moving
genX_fill_interface_descriptor, which are the same for media and gpgpu.
  Function gen8_fill_media_kernel is not removed in this patch
(although it is identical with gen7 version), because this patch
should be as much as possible functions movement.
  gen8_fill_interface_descriptor was unified for media and gpgpu
by adding kernel and its size as a parameter (this parameters
were missing in media gen8, gen8lp and gen9 functions)
  gen8_emit_state_base_address was unified, the one for gpgpu was
configured like it would be using indirect state (while we are
using CURBE). I have checked that media fill version
(OUT_BATCH(0 | BASE_ADDRESS_MODIFY)) works fine on gpgpu gen8 and newer.

v2: Changed code layout. GenX_fill_media_kernel was identical to
genX_fill_gpgpu_kernel so this function was unified to
gen7_fill_kernel. There were 2 very similar functions
gen8_emit_state_base_address for media and gpgpu, where the one
for gpgpu was configured like it would be using indirect state
(while we are using CURBE). I have checked if media fill version
works fine in gpgpu test on Gen8 and unified them.

v3: Made patch easier for reviewing moving changes unifying code for
various gens (that were included v1) to other patch, leaving only
the most critical code changes.

v5: Added copyrights and #ifndef to gpu_fill.h

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 lib/Makefile.sources    |   2 +
 lib/gpgpu_fill.c        | 571 +-----------------------------------
 lib/gpu_fill.c          | 758 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/gpu_fill.h          | 166 +++++++++++
 lib/media_fill_gen7.c   | 271 +----------------
 lib/media_fill_gen8.c   | 290 +-----------------
 lib/media_fill_gen8lp.c | 284 +-----------------
 lib/media_fill_gen9.c   | 298 +------------------
 lib/meson.build         |   1 +
 9 files changed, 936 insertions(+), 1705 deletions(-)
 create mode 100644 lib/gpu_fill.c
 create mode 100644 lib/gpu_fill.h

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 3d37ef1d..45e65dd7 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -64,6 +64,8 @@ lib_source_list =	 	\
 	media_spin.c		\
 	gpgpu_fill.h		\
 	gpgpu_fill.c		\
+	gpu_fill.h		\
+	gpu_fill.c		\
 	gen7_media.h            \
 	gen8_media.h            \
 	rendercopy_i915.c	\
diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index 4d98643d..f2765fd6 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -30,10 +30,9 @@
 
 #include "intel_reg.h"
 #include "drmtest.h"
-#include "intel_batchbuffer.h"
-#include "gen7_media.h"
-#include "gen8_media.h"
+
 #include "gpgpu_fill.h"
+#include "gpu_fill.h"
 
 /* shaders/gpgpu/gpgpu_fill.gxa */
 static const uint32_t gen7_gpgpu_kernel[][4] = {
@@ -75,572 +74,6 @@ static const uint32_t gen9_gpgpu_kernel[][4] = {
 	{ 0x07800031, 0x20000a40, 0x06000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size,
-	   uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-static uint32_t
-gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch, uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen7_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN7_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss1.base_addr = buf->bo->offset;
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen8_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN8_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-	ss->ss0.vertical_alignment = 1; /* align 4 */
-	ss->ss0.horizontal_alignment = 1; /* align 4 */
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss8.base_addr = buf->bo->offset;
-
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 8 * 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert_eq(ret, 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-
-}
-
-static uint32_t
-gen7_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen7_fill_surface_state(batch, dst, GEN7_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst, GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_gpgpu_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
-			       const uint32_t kernel[][4], size_t size)
-{
-	struct gen7_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen7_fill_binding_table(batch, dst);
-	kernel_offset = gen7_fill_gpgpu_kernel(batch, kernel, size);
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc1.single_program_flow = 1;
-	idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
-
-	idd->desc2.sampler_count = 0;      /* 0 samplers used */
-	idd->desc2.sampler_state_pointer = 0;
-
-	idd->desc3.binding_table_entry_count = 0;
-	idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc4.constant_urb_entry_read_offset = 0;
-	idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
-			       const uint32_t kernel[][4], size_t size)
-{
-	struct gen8_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen7_fill_gpgpu_kernel(batch, kernel, size);
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc2.single_program_flow = 1;
-	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
-
-	idd->desc3.sampler_count = 0;      /* 0 samplers used */
-	idd->desc3.sampler_state_pointer = 0;
-
-	idd->desc4.binding_table_entry_count = 0;
-	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc5.constant_urb_entry_read_offset = 0;
-	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen7_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
-
-	/* general */
-	OUT_BATCH(0);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general/dynamic/indirect/instruction access Bound */
-	OUT_BATCH(0);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-}
-
-static void
-gen8_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
-
-	/* general */
-	OUT_BATCH(0 | (0x78 << 4) | (0 << 1) |  BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		  0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0 );
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-}
-
-static void
-gen9_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
-
-	/* general */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-
-	/* Bindless surface state base address */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-	OUT_BATCH(0xfffff000);
-}
-
-static void
-gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 | /* max num of threads */
-		  0 << 8 | /* num of URB entry */
-		  1 << 2); /* GPGPU mode */
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(0 << 16 | 	/* URB entry size in 256 bits unit */
-		  1);		/* CURBE entry size in 256 bits unit */
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 | 1 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(0 << 16 | 1);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN7_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-static void
-gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
-		     unsigned x, unsigned y,
-		     unsigned width, unsigned height)
-{
-	uint32_t x_dim, y_dim, tmp, right_mask;
-
-	/*
-	 * Simply do SIMD16 based dispatch, so every thread uses
-	 * SIMD16 channels.
-	 *
-	 * Define our own thread group size, e.g 16x1 for every group, then
-	 * will have 1 thread each group in SIMD16 dispatch. So thread
-	 * width/height/depth are all 1.
-	 *
-	 * Then thread group X = width / 16 (aligned to 16)
-	 * thread group Y = height;
-	 */
-	x_dim = (width + 15) / 16;
-	y_dim = height;
-
-	tmp = width & 15;
-	if (tmp == 0)
-		right_mask = (1 << 16) - 1;
-	else
-		right_mask = (1 << tmp) - 1;
-
-	OUT_BATCH(GEN7_GPGPU_WALKER | 9);
-
-	/* interface descriptor offset */
-	OUT_BATCH(0);
-
-	/* SIMD size, thread w/h/d */
-	OUT_BATCH(1 << 30 | /* SIMD16 */
-		  0 << 16 | /* depth:1 */
-		  0 << 8 | /* height:1 */
-		  0); /* width:1 */
-
-	/* thread group X */
-	OUT_BATCH(0);
-	OUT_BATCH(x_dim);
-
-	/* thread group Y */
-	OUT_BATCH(0);
-	OUT_BATCH(y_dim);
-
-	/* thread group Z */
-	OUT_BATCH(0);
-	OUT_BATCH(1);
-
-	/* right mask */
-	OUT_BATCH(right_mask);
-
-	/* bottom mask, height 1, always 0xffffffff */
-	OUT_BATCH(0xffffffff);
-}
-
-static void
-gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
-		     unsigned x, unsigned y,
-		     unsigned width, unsigned height)
-{
-	uint32_t x_dim, y_dim, tmp, right_mask;
-
-	/*
-	 * Simply do SIMD16 based dispatch, so every thread uses
-	 * SIMD16 channels.
-	 *
-	 * Define our own thread group size, e.g 16x1 for every group, then
-	 * will have 1 thread each group in SIMD16 dispatch. So thread
-	 * width/height/depth are all 1.
-	 *
-	 * Then thread group X = width / 16 (aligned to 16)
-	 * thread group Y = height;
-	 */
-	x_dim = (width + 15) / 16;
-	y_dim = height;
-
-	tmp = width & 15;
-	if (tmp == 0)
-		right_mask = (1 << 16) - 1;
-	else
-		right_mask = (1 << tmp) - 1;
-
-	OUT_BATCH(GEN7_GPGPU_WALKER | 13);
-
-	OUT_BATCH(0); /* kernel offset */
-	OUT_BATCH(0); /* indirect data length */
-	OUT_BATCH(0); /* indirect data offset */
-
-	/* SIMD size, thread w/h/d */
-	OUT_BATCH(1 << 30 | /* SIMD16 */
-		  0 << 16 | /* depth:1 */
-		  0 << 8 | /* height:1 */
-		  0); /* width:1 */
-
-	/* thread group X */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(x_dim);
-
-	/* thread group Y */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(y_dim);
-
-	/* thread group Z */
-	OUT_BATCH(0);
-	OUT_BATCH(1);
-
-	/* right mask */
-	OUT_BATCH(right_mask);
-
-	/* bottom mask, height 1, always 0xffffffff */
-	OUT_BATCH(0xffffffff);
-}
-
 /*
  * This sets up the gpgpu pipeline,
  *
diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
new file mode 100644
index 00000000..172c6db6
--- /dev/null
+++ b/lib/gpu_fill.c
@@ -0,0 +1,758 @@
+#include "gpu_fill.h"
+
+uint32_t
+batch_used(struct intel_batchbuffer *batch)
+{
+	return batch->ptr - batch->buffer;
+}
+
+uint32_t
+batch_align(struct intel_batchbuffer *batch, uint32_t align)
+{
+	uint32_t offset = batch_used(batch);
+	offset = ALIGN(offset, align);
+	batch->ptr = batch->buffer + offset;
+	return offset;
+}
+
+void *
+batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
+{
+	uint32_t offset = batch_align(batch, align);
+	batch->ptr += size;
+	return memset(batch->buffer + offset, 0, size);
+}
+
+uint32_t
+batch_offset(struct intel_batchbuffer *batch, void *ptr)
+{
+	return (uint8_t *)ptr - batch->buffer;
+}
+
+uint32_t
+batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
+{
+	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
+}
+
+void
+gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
+{
+	int ret;
+
+	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
+	if (ret == 0)
+		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
+					NULL, 0, 0, 0);
+	igt_assert(ret == 0);
+}
+
+uint32_t
+gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
+			uint8_t color)
+{
+	uint8_t *curbe_buffer;
+	uint32_t offset;
+
+	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
+	offset = batch_offset(batch, curbe_buffer);
+	*curbe_buffer = color;
+
+	return offset;
+}
+
+uint32_t
+gen7_fill_surface_state(struct intel_batchbuffer *batch,
+			struct igt_buf *buf,
+			uint32_t format,
+			int is_dst)
+{
+	struct gen7_surface_state *ss;
+	uint32_t write_domain, read_domain, offset;
+	int ret;
+
+	if (is_dst) {
+		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
+	} else {
+		write_domain = 0;
+		read_domain = I915_GEM_DOMAIN_SAMPLER;
+	}
+
+	ss = batch_alloc(batch, sizeof(*ss), 64);
+	offset = batch_offset(batch, ss);
+
+	ss->ss0.surface_type = GEN7_SURFACE_2D;
+	ss->ss0.surface_format = format;
+	ss->ss0.render_cache_read_write = 1;
+
+	if (buf->tiling == I915_TILING_X)
+		ss->ss0.tiled_mode = 2;
+	else if (buf->tiling == I915_TILING_Y)
+		ss->ss0.tiled_mode = 3;
+
+	ss->ss1.base_addr = buf->bo->offset;
+	ret = drm_intel_bo_emit_reloc(batch->bo,
+				batch_offset(batch, ss) + 4,
+				buf->bo, 0,
+				read_domain, write_domain);
+	igt_assert(ret == 0);
+
+	ss->ss2.height = igt_buf_height(buf) - 1;
+	ss->ss2.width  = igt_buf_width(buf) - 1;
+
+	ss->ss3.pitch  = buf->stride - 1;
+
+	ss->ss7.shader_chanel_select_r = 4;
+	ss->ss7.shader_chanel_select_g = 5;
+	ss->ss7.shader_chanel_select_b = 6;
+	ss->ss7.shader_chanel_select_a = 7;
+
+	return offset;
+}
+
+uint32_t
+gen7_fill_binding_table(struct intel_batchbuffer *batch,
+			struct igt_buf *dst)
+{
+	uint32_t *binding_table, offset;
+
+	binding_table = batch_alloc(batch, 32, 64);
+	offset = batch_offset(batch, binding_table);
+	binding_table[0] = gen7_fill_surface_state(batch, dst,
+						GEN7_SURFACEFORMAT_R8_UNORM, 1);
+
+	return offset;
+}
+
+uint32_t
+gen7_fill_media_kernel(struct intel_batchbuffer *batch,
+		const uint32_t kernel[][4],
+		size_t size)
+{
+	uint32_t offset;
+
+	offset = batch_copy(batch, kernel, size, 64);
+
+	return offset;
+}
+
+uint32_t
+gen8_fill_media_kernel(struct intel_batchbuffer *batch,
+		const uint32_t kernel[][4],
+		size_t size)
+{
+	uint32_t offset;
+
+	offset = batch_copy(batch, kernel, size, 64);
+
+	return offset;
+}
+
+uint32_t
+gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
+			       const uint32_t kernel[][4], size_t size)
+{
+	struct gen7_interface_descriptor_data *idd;
+	uint32_t offset;
+	uint32_t binding_table_offset, kernel_offset;
+
+	binding_table_offset = gen7_fill_binding_table(batch, dst);
+	kernel_offset = gen7_fill_media_kernel(batch, kernel, size);
+
+	idd = batch_alloc(batch, sizeof(*idd), 64);
+	offset = batch_offset(batch, idd);
+
+	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
+
+	idd->desc1.single_program_flow = 1;
+	idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
+
+	idd->desc2.sampler_count = 0;      /* 0 samplers used */
+	idd->desc2.sampler_state_pointer = 0;
+
+	idd->desc3.binding_table_entry_count = 0;
+	idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
+
+	idd->desc4.constant_urb_entry_read_offset = 0;
+	idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
+
+	return offset;
+}
+
+void
+gen7_emit_state_base_address(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
+
+	/* general */
+	OUT_BATCH(0);
+
+	/* surface */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* dynamic */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* indirect */
+	OUT_BATCH(0);
+
+	/* instruction */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* general/dynamic/indirect/instruction access Bound */
+	OUT_BATCH(0);
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+}
+
+void
+gen7_emit_vfe_state(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
+
+	/* scratch buffer */
+	OUT_BATCH(0);
+
+	/* number of threads & urb entries */
+	OUT_BATCH(1 << 16 |
+		2 << 8);
+
+	OUT_BATCH(0);
+
+	/* urb entry size & curbe size */
+	OUT_BATCH(2 << 16 | 	/* in 256 bits unit */
+		2);		/* in 256 bits unit */
+
+	/* scoreboard */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
+
+	/* scratch buffer */
+	OUT_BATCH(0);
+
+	/* number of threads & urb entries */
+	OUT_BATCH(1 << 16 | /* max num of threads */
+		  0 << 8 | /* num of URB entry */
+		  1 << 2); /* GPGPU mode */
+
+	OUT_BATCH(0);
+
+	/* urb entry size & curbe size */
+	OUT_BATCH(0 << 16 | 	/* URB entry size in 256 bits unit */
+		  1);		/* CURBE entry size in 256 bits unit */
+
+	/* scoreboard */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
+{
+	OUT_BATCH(GEN7_MEDIA_CURBE_LOAD | (4 - 2));
+	OUT_BATCH(0);
+	/* curbe total data length */
+	OUT_BATCH(64);
+	/* curbe data start address, is relative to the dynamics base address */
+	OUT_BATCH(curbe_buffer);
+}
+
+void
+gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
+{
+	OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
+	OUT_BATCH(0);
+	/* interface descriptor data length */
+	OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
+	/* interface descriptor address, is relative to the dynamics base address */
+	OUT_BATCH(interface_descriptor);
+}
+
+void
+gen7_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height)
+{
+	int i, j;
+
+	for (i = 0; i < width / 16; i++) {
+		for (j = 0; j < height / 16; j++) {
+			OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
+
+			/* interface descriptor offset */
+			OUT_BATCH(0);
+
+			/* without indirect data */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* scoreboard */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* inline data (xoffset, yoffset) */
+			OUT_BATCH(x + i * 16);
+			OUT_BATCH(y + j * 16);
+		}
+	}
+}
+
+void
+gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
+		     unsigned x, unsigned y,
+		     unsigned width, unsigned height)
+{
+	uint32_t x_dim, y_dim, tmp, right_mask;
+
+	/*
+	 * Simply do SIMD16 based dispatch, so every thread uses
+	 * SIMD16 channels.
+	 *
+	 * Define our own thread group size, e.g 16x1 for every group, then
+	 * will have 1 thread each group in SIMD16 dispatch. So thread
+	 * width/height/depth are all 1.
+	 *
+	 * Then thread group X = width / 16 (aligned to 16)
+	 * thread group Y = height;
+	 */
+	x_dim = (width + 15) / 16;
+	y_dim = height;
+
+	tmp = width & 15;
+	if (tmp == 0)
+		right_mask = (1 << 16) - 1;
+	else
+		right_mask = (1 << tmp) - 1;
+
+	OUT_BATCH(GEN7_GPGPU_WALKER | 9);
+
+	/* interface descriptor offset */
+	OUT_BATCH(0);
+
+	/* SIMD size, thread w/h/d */
+	OUT_BATCH(1 << 30 | /* SIMD16 */
+		  0 << 16 | /* depth:1 */
+		  0 << 8 | /* height:1 */
+		  0); /* width:1 */
+
+	/* thread group X */
+	OUT_BATCH(0);
+	OUT_BATCH(x_dim);
+
+	/* thread group Y */
+	OUT_BATCH(0);
+	OUT_BATCH(y_dim);
+
+	/* thread group Z */
+	OUT_BATCH(0);
+	OUT_BATCH(1);
+
+	/* right mask */
+	OUT_BATCH(right_mask);
+
+	/* bottom mask, height 1, always 0xffffffff */
+	OUT_BATCH(0xffffffff);
+}
+
+void
+gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
+{
+	int ret;
+
+	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
+	if (ret == 0)
+		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
+					NULL, 0, 0, 0);
+	igt_assert(ret == 0);
+}
+
+
+uint32_t
+gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
+			uint8_t color)
+{
+	uint8_t *curbe_buffer;
+	uint32_t offset;
+
+	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
+	offset = batch_offset(batch, curbe_buffer);
+	*curbe_buffer = color;
+
+	return offset;
+}
+
+uint32_t
+gen8_fill_surface_state(struct intel_batchbuffer *batch,
+			struct igt_buf *buf,
+			uint32_t format,
+			int is_dst)
+{
+	struct gen8_surface_state *ss;
+	uint32_t write_domain, read_domain, offset;
+	int ret;
+
+	if (is_dst) {
+		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
+	} else {
+		write_domain = 0;
+		read_domain = I915_GEM_DOMAIN_SAMPLER;
+	}
+
+	ss = batch_alloc(batch, sizeof(*ss), 64);
+	offset = batch_offset(batch, ss);
+
+	ss->ss0.surface_type = GEN8_SURFACE_2D;
+	ss->ss0.surface_format = format;
+	ss->ss0.render_cache_read_write = 1;
+	ss->ss0.vertical_alignment = 1; /* align 4 */
+	ss->ss0.horizontal_alignment = 1; /* align 4 */
+
+	if (buf->tiling == I915_TILING_X)
+		ss->ss0.tiled_mode = 2;
+	else if (buf->tiling == I915_TILING_Y)
+		ss->ss0.tiled_mode = 3;
+
+	ss->ss8.base_addr = buf->bo->offset;
+
+	ret = drm_intel_bo_emit_reloc(batch->bo,
+				batch_offset(batch, ss) + 8 * 4,
+				buf->bo, 0,
+				read_domain, write_domain);
+	igt_assert(ret == 0);
+
+	ss->ss2.height = igt_buf_height(buf) - 1;
+	ss->ss2.width  = igt_buf_width(buf) - 1;
+	ss->ss3.pitch  = buf->stride - 1;
+
+	ss->ss7.shader_chanel_select_r = 4;
+	ss->ss7.shader_chanel_select_g = 5;
+	ss->ss7.shader_chanel_select_b = 6;
+	ss->ss7.shader_chanel_select_a = 7;
+
+	return offset;
+}
+
+uint32_t
+gen8_fill_binding_table(struct intel_batchbuffer *batch,
+			struct igt_buf *dst)
+{
+	uint32_t *binding_table, offset;
+
+	binding_table = batch_alloc(batch, 32, 64);
+	offset = batch_offset(batch, binding_table);
+
+	binding_table[0] = gen8_fill_surface_state(batch, dst,
+						GEN8_SURFACEFORMAT_R8_UNORM, 1);
+
+	return offset;
+}
+
+uint32_t
+gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size)
+{
+	struct gen8_interface_descriptor_data *idd;
+	uint32_t offset;
+	uint32_t binding_table_offset, kernel_offset;
+
+	binding_table_offset = gen8_fill_binding_table(batch, dst);
+	kernel_offset = gen8_fill_media_kernel(batch, kernel, size);
+
+	idd = batch_alloc(batch, sizeof(*idd), 64);
+	offset = batch_offset(batch, idd);
+
+	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
+
+	idd->desc2.single_program_flow = 1;
+	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
+
+	idd->desc3.sampler_count = 0;      /* 0 samplers used */
+	idd->desc3.sampler_state_pointer = 0;
+
+	idd->desc4.binding_table_entry_count = 0;
+	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
+
+	idd->desc5.constant_urb_entry_read_offset = 0;
+	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
+
+	return offset;
+}
+
+void
+gen8_emit_state_base_address(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
+
+	/* general */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+
+	/* stateless data port */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+
+	/* surface */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
+
+	/* dynamic */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
+		0, BASE_ADDRESS_MODIFY);
+
+	/* indirect */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* instruction */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* general state buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* dynamic state buffer size */
+	OUT_BATCH(1 << 12 | 1);
+	/* indirect object buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
+	OUT_BATCH(1 << 12 | 1);
+}
+
+void
+gen8_emit_vfe_state(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
+
+	/* scratch buffer */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* number of threads & urb entries */
+	OUT_BATCH(1 << 16 |
+		2 << 8);
+
+	OUT_BATCH(0);
+
+	/* urb entry size & curbe size */
+	OUT_BATCH(2 << 16 |
+		2);
+
+	/* scoreboard */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
+
+	/* scratch buffer */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* number of threads & urb entries */
+	OUT_BATCH(1 << 16 | 1 << 8);
+
+	OUT_BATCH(0);
+
+	/* urb entry size & curbe size */
+	OUT_BATCH(0 << 16 | 1);
+
+	/* scoreboard */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
+{
+	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
+	OUT_BATCH(0);
+	/* curbe total data length */
+	OUT_BATCH(64);
+	/* curbe data start address, is relative to the dynamics base address */
+	OUT_BATCH(curbe_buffer);
+}
+
+void
+gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
+{
+	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
+	OUT_BATCH(0);
+	/* interface descriptor data length */
+	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
+	/* interface descriptor address, is relative to the dynamics base address */
+	OUT_BATCH(interface_descriptor);
+}
+
+void
+gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
+	OUT_BATCH(0);
+}
+
+void
+gen8_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height)
+{
+	int i, j;
+
+	for (i = 0; i < width / 16; i++) {
+		for (j = 0; j < height / 16; j++) {
+			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
+
+			/* interface descriptor offset */
+			OUT_BATCH(0);
+
+			/* without indirect data */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* scoreboard */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* inline data (xoffset, yoffset) */
+			OUT_BATCH(x + i * 16);
+			OUT_BATCH(y + j * 16);
+			gen8_emit_media_state_flush(batch);
+		}
+	}
+}
+void
+gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height)
+{
+	int i, j;
+
+	for (i = 0; i < width / 16; i++) {
+		for (j = 0; j < height / 16; j++) {
+			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
+
+			/* interface descriptor offset */
+			OUT_BATCH(0);
+
+			/* without indirect data */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* scoreboard */
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+
+			/* inline data (xoffset, yoffset) */
+			OUT_BATCH(x + i * 16);
+			OUT_BATCH(y + j * 16);
+		}
+	}
+}
+void
+gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
+		     unsigned x, unsigned y,
+		     unsigned width, unsigned height)
+{
+	uint32_t x_dim, y_dim, tmp, right_mask;
+
+	/*
+	 * Simply do SIMD16 based dispatch, so every thread uses
+	 * SIMD16 channels.
+	 *
+	 * Define our own thread group size, e.g 16x1 for every group, then
+	 * will have 1 thread each group in SIMD16 dispatch. So thread
+	 * width/height/depth are all 1.
+	 *
+	 * Then thread group X = width / 16 (aligned to 16)
+	 * thread group Y = height;
+	 */
+	x_dim = (width + 15) / 16;
+	y_dim = height;
+
+	tmp = width & 15;
+	if (tmp == 0)
+		right_mask = (1 << 16) - 1;
+	else
+		right_mask = (1 << tmp) - 1;
+
+	OUT_BATCH(GEN7_GPGPU_WALKER | 13);
+
+	OUT_BATCH(0); /* kernel offset */
+	OUT_BATCH(0); /* indirect data length */
+	OUT_BATCH(0); /* indirect data offset */
+
+	/* SIMD size, thread w/h/d */
+	OUT_BATCH(1 << 30 | /* SIMD16 */
+		  0 << 16 | /* depth:1 */
+		  0 << 8 | /* height:1 */
+		  0); /* width:1 */
+
+	/* thread group X */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(x_dim);
+
+	/* thread group Y */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(y_dim);
+
+	/* thread group Z */
+	OUT_BATCH(0);
+	OUT_BATCH(1);
+
+	/* right mask */
+	OUT_BATCH(right_mask);
+
+	/* bottom mask, height 1, always 0xffffffff */
+	OUT_BATCH(0xffffffff);
+}
+
+void
+gen9_emit_state_base_address(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
+
+	/* general */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+
+	/* stateless data port */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+
+	/* surface */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
+
+	/* dynamic */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
+		0, BASE_ADDRESS_MODIFY);
+
+	/* indirect */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* instruction */
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
+
+	/* general state buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* dynamic state buffer size */
+	OUT_BATCH(1 << 12 | 1);
+	/* indirect object buffer size */
+	OUT_BATCH(0xfffff000 | 1);
+	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
+	OUT_BATCH(1 << 12 | 1);
+
+	/* Bindless surface state base address */
+	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+	OUT_BATCH(0xfffff000);
+}
diff --git a/lib/gpu_fill.h b/lib/gpu_fill.h
new file mode 100644
index 00000000..b20430b1
--- /dev/null
+++ b/lib/gpu_fill.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+#ifndef GPU_FILL_H
+#define GPU_FILL_H
+
+#include <intel_bufmgr.h>
+#include <i915_drm.h>
+
+#include "media_fill.h"
+#include "gen7_media.h"
+#include "gen8_media.h"
+#include "intel_reg.h"
+#include "drmtest.h"
+#include "intel_batchbuffer.h"
+#include "intel_chipset.h"
+#include <assert.h>
+
+uint32_t
+batch_used(struct intel_batchbuffer *batch);
+
+uint32_t
+batch_align(struct intel_batchbuffer *batch, uint32_t align);
+
+void *
+batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align);
+
+uint32_t
+batch_offset(struct intel_batchbuffer *batch, void *ptr);
+
+uint32_t
+batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align);
+
+void
+gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
+
+uint32_t
+gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
+			uint8_t color);
+
+uint32_t
+gen7_fill_surface_state(struct intel_batchbuffer *batch,
+			struct igt_buf *buf,
+			uint32_t format,
+			int is_dst);
+
+uint32_t
+gen7_fill_binding_table(struct intel_batchbuffer *batch,
+			struct igt_buf *dst);
+
+uint32_t
+gen7_fill_media_kernel(struct intel_batchbuffer *batch,
+		const uint32_t kernel[][4],
+		size_t size);
+
+uint32_t
+gen8_fill_media_kernel(struct intel_batchbuffer *batch,
+		const uint32_t kernel[][4],
+		size_t size);
+
+uint32_t
+gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
+			       const uint32_t kernel[][4], size_t size);
+
+void
+gen7_emit_state_base_address(struct intel_batchbuffer *batch);
+
+void
+gen7_emit_vfe_state(struct intel_batchbuffer *batch);
+
+void
+gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
+
+void
+gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
+
+void
+gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor);
+
+void
+gen7_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height);
+
+void
+gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
+		     unsigned x, unsigned y,
+		     unsigned width, unsigned height);
+
+void
+gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
+
+uint32_t
+gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
+			uint8_t color);
+
+uint32_t
+gen8_fill_surface_state(struct intel_batchbuffer *batch,
+			struct igt_buf *buf,
+			uint32_t format,
+			int is_dst);
+
+uint32_t
+gen8_fill_binding_table(struct intel_batchbuffer *batch,
+			struct igt_buf *dst);
+
+uint32_t
+gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size);
+
+void
+gen8_emit_state_base_address(struct intel_batchbuffer *batch);
+
+void
+gen8_emit_vfe_state(struct intel_batchbuffer *batch);
+
+void
+gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
+
+void
+gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
+
+void
+gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor);
+
+void
+gen8_emit_media_state_flush(struct intel_batchbuffer *batch);
+
+void
+gen8_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height);
+
+void
+gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
+			unsigned x, unsigned y,
+			unsigned width, unsigned height);
+
+void
+gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
+		     unsigned x, unsigned y,
+		     unsigned width, unsigned height);
+
+void
+gen9_emit_state_base_address(struct intel_batchbuffer *batch);
+
+#endif /* GPU_FILL_H */
diff --git a/lib/media_fill_gen7.c b/lib/media_fill_gen7.c
index 6fb44798..c97555a6 100644
--- a/lib/media_fill_gen7.c
+++ b/lib/media_fill_gen7.c
@@ -5,7 +5,7 @@
 #include "gen7_media.h"
 #include "intel_reg.h"
 #include "drmtest.h"
-
+#include "gpu_fill.h"
 #include <assert.h>
 
 static const uint32_t media_kernel[][4] = {
@@ -22,275 +22,6 @@ static const uint32_t media_kernel[][4] = {
 	{ 0x07800031, 0x20001ca8, 0x00000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-static uint32_t
-gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen7_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN7_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss1.base_addr = buf->bo->offset;
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen7_fill_surface_state(batch, dst, GEN7_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
-			       const uint32_t kernel[][4], size_t size)
-{
-	struct gen7_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen7_fill_binding_table(batch, dst);
-	kernel_offset = gen7_fill_media_kernel(batch, kernel, size);
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc1.single_program_flow = 1;
-	idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
-
-	idd->desc2.sampler_count = 0;      /* 0 samplers used */
-	idd->desc2.sampler_state_pointer = 0;
-
-	idd->desc3.binding_table_entry_count = 0;
-	idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc4.constant_urb_entry_read_offset = 0;
-	idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen7_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
-
-	/* general */
-	OUT_BATCH(0);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general/dynamic/indirect/instruction access Bound */
-	OUT_BATCH(0);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-}
-
-static void
-gen7_emit_vfe_state(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 |
-		2 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 | 	/* in 256 bits unit */
-		2);		/* in 256 bits unit */
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN7_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-static void
-gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen7_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-		}
-	}
-}
-
 /*
  * This sets up the media pipeline,
  *
diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
index 4a8fe5a2..4270997e 100644
--- a/lib/media_fill_gen8.c
+++ b/lib/media_fill_gen8.c
@@ -5,7 +5,7 @@
 #include "gen8_media.h"
 #include "intel_reg.h"
 #include "drmtest.h"
-
+#include "gpu_fill.h"
 #include <assert.h>
 
 
@@ -23,293 +23,7 @@ static const uint32_t media_kernel[][4] = {
 	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-static uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen8_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN8_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-	ss->ss0.vertical_alignment = 1; /* align 4 */
-	ss->ss0.horizontal_alignment = 1; /* align 4 */
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss8.base_addr = buf->bo->offset;
-
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 8 * 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst, GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst)
-{
-	struct gen8_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen8_fill_media_kernel(batch, media_kernel, sizeof(media_kernel));
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc2.single_program_flow = 1;
-	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
-
-	idd->desc3.sampler_count = 0;      /* 0 samplers used */
-	idd->desc3.sampler_state_pointer = 0;
-
-	idd->desc4.binding_table_entry_count = 0;
-	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc5.constant_urb_entry_read_offset = 0;
-	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen8_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
-
-	/* general */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-}
-
-static void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 |
-		2 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 |
-		2);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
 
-static void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-			gen8_emit_media_state_flush(batch);
-		}
-	}
-}
 
 /*
  * This sets up the media pipeline,
@@ -349,7 +63,7 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst);
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/media_fill_gen8lp.c b/lib/media_fill_gen8lp.c
index 1f8a4adc..dcc11982 100644
--- a/lib/media_fill_gen8lp.c
+++ b/lib/media_fill_gen8lp.c
@@ -5,7 +5,7 @@
 #include "gen8_media.h"
 #include "intel_reg.h"
 #include "drmtest.h"
-
+#include "gpu_fill.h"
 #include <assert.h>
 
 
@@ -23,286 +23,6 @@ static const uint32_t media_kernel[][4] = {
 	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-static uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen8_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN8_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-	ss->ss0.vertical_alignment = 1; /* align 4 */
-	ss->ss0.horizontal_alignment = 1; /* align 4 */
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss8.base_addr = buf->bo->offset;
-
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 8 * 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst, GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst)
-{
-	struct gen8_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen8_fill_media_kernel(batch, media_kernel, sizeof(media_kernel));
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc2.single_program_flow = 1;
-	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
-
-	idd->desc3.sampler_count = 0;      /* 0 samplers used */
-	idd->desc3.sampler_state_pointer = 0;
-
-	idd->desc4.binding_table_entry_count = 0;
-	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc5.constant_urb_entry_read_offset = 0;
-	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen8_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
-
-	/* general */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-}
-
-static void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 |
-		2 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 |
-		2);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-static void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-		}
-	}
-}
-
 /*
  * This sets up the media pipeline,
  *
@@ -341,7 +61,7 @@ gen8lp_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst);
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
index 3fd21819..6accdbe4 100644
--- a/lib/media_fill_gen9.c
+++ b/lib/media_fill_gen9.c
@@ -4,11 +4,9 @@
 #include "media_fill.h"
 #include "gen8_media.h"
 #include "intel_reg.h"
-
+#include "gpu_fill.h"
 #include <assert.h>
 
-#define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1))
-
 static const uint32_t media_kernel[][4] = {
 	{ 0x00400001, 0x20202288, 0x00000020, 0x00000000 },
 	{ 0x00600001, 0x20800208, 0x008d0000, 0x00000000 },
@@ -23,298 +21,6 @@ static const uint32_t media_kernel[][4] = {
 	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
 };
 
-static uint32_t
-batch_used(struct intel_batchbuffer *batch)
-{
-	return batch->ptr - batch->buffer;
-}
-
-static uint32_t
-batch_align(struct intel_batchbuffer *batch, uint32_t align)
-{
-	uint32_t offset = batch_used(batch);
-	offset = ALIGN(offset, align);
-	batch->ptr = batch->buffer + offset;
-	return offset;
-}
-
-static void *
-batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
-{
-	uint32_t offset = batch_align(batch, align);
-	batch->ptr += size;
-	return memset(batch->buffer + offset, 0, size);
-}
-
-static uint32_t
-batch_offset(struct intel_batchbuffer *batch, void *ptr)
-{
-	return (uint8_t *)ptr - batch->buffer;
-}
-
-static uint32_t
-batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, uint32_t align)
-{
-	return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size));
-}
-
-static void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	assert(ret == 0);
-}
-
-static uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_surface_state(struct intel_batchbuffer *batch,
-			struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen8_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = batch_alloc(batch, sizeof(*ss), 64);
-	offset = batch_offset(batch, ss);
-
-	ss->ss0.surface_type = GEN8_SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-	ss->ss0.vertical_alignment = 1; /* align 4 */
-	ss->ss0.horizontal_alignment = 1; /* align 4 */
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss8.base_addr = buf->bo->offset;
-
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				batch_offset(batch, ss) + 8 * 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-	ss->ss3.pitch  = buf->stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst, GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-static uint32_t
-gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst)
-{
-	struct gen8_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen8_fill_media_kernel(batch, media_kernel, sizeof(media_kernel));
-
-	idd = batch_alloc(batch, sizeof(*idd), 64);
-	offset = batch_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc2.single_program_flow = 1;
-	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
-
-	idd->desc3.sampler_count = 0;      /* 0 samplers used */
-	idd->desc3.sampler_state_pointer = 0;
-
-	idd->desc4.binding_table_entry_count = 0;
-	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc5.constant_urb_entry_read_offset = 0;
-	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-static void
-gen9_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
-
-	/* general */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
-	OUT_BATCH(1 << 12 | 1);
-
-	/* Bindless surface state base address */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-	OUT_BATCH(0xfffff000);
-}
-
-static void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 |
-		2 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 |
-		2);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-static void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-static void
-gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
-	OUT_BATCH(0);
-}
-
-static void
-gen8_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-			gen8_emit_media_state_flush(batch);
-		}
-	}
-}
 
 /*
  * This sets up the media pipeline,
@@ -354,7 +60,7 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst);
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/meson.build b/lib/meson.build
index b3b8b14a..385e08b9 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -30,6 +30,7 @@ lib_sources = [
 	'media_fill_gen9.c',
 	'media_spin.c',
 	'gpgpu_fill.c',
+	'gpu_fill.c',
 	'rendercopy_i915.c',
 	'rendercopy_i830.c',
 	'rendercopy_gen6.c',
-- 
2.14.3

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v6 2/4] lib: Remove duplications in gpu_fill library
  2018-04-10 11:28 ` [igt-dev] [PATCH i-g-t v2] lib/gen6_render: Refactoring lib Lukasz Kalamarz
  2018-04-10 13:28   ` Katarzyna Dec
  2018-04-10 14:19   ` [igt-dev] [PATCH i-g-t v6 1/4] lib: Move common gpgpu/media fill functions to gpu_fill library Katarzyna Dec
@ 2018-04-10 14:19   ` Katarzyna Dec
  2018-04-10 14:19   ` [igt-dev] [PATCH i-g-t v6 3/4] lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill Katarzyna Dec
  2018-04-10 14:19   ` [igt-dev] [PATCH i-g-t v6 4/4] lib: Adjust refactored gpu_fill library to our coding style Katarzyna Dec
  4 siblings, 0 replies; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 14:19 UTC (permalink / raw)
  To: igt-dev

After moving all functions needed for gpgpu and media fill testing
there is a lot of duplications which can be removed:
  Library media_fill_gen8 and media_fill_gen8lp for CHT was removed,
media state flush for !CHT was added to gen7_emit_media_objects.
  Many gen8 functions were replaced with gen7 version with devid
parameter (gen7_fill_curbe_load, gen7_emit_interface_descriptor,
gen7_fill_binding_table, gen7_emit_media_objects). Unified fill kernel
function so it is applicable to all gens and both media and gpgpu
(merged gen7_fill_media_kernel and gen8_fill_media_kernel).
  Duplicated constants like GEN8_MEDIA_VFE_STATE, GEN8_MEDIA_CURBE_LOAD,
GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, GEN8_MEDIA_OBJECT were
replaced by GEN7 version. However this constants were not removed
from gen8_media.h library, because they are used by other tests
for Gen8+. More refactoring in this gen*_media.h libraries is needed.

It seems that further unification of *_fillfunc functions will
introduce more confusion in understanding what the tests are doing
and what were changes between Gens.

v2: Moved some reduntant changes from Move gpgpu/media fill to gpu_fill...
to this patch. Applied comments from review.

v3: rebase

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 lib/Makefile.sources    |   1 -
 lib/gpgpu_fill.c        |   2 +-
 lib/gpu_fill.c          | 172 +++++++-----------------------------------------
 lib/gpu_fill.h          |  38 +----------
 lib/intel_batchbuffer.c |   4 +-
 lib/media_fill.h        |   7 --
 lib/media_fill_gen8.c   |  10 +--
 lib/media_fill_gen8lp.c |  87 ------------------------
 lib/media_fill_gen9.c   |  10 +--
 lib/meson.build         |   1 -
 10 files changed, 39 insertions(+), 293 deletions(-)
 delete mode 100644 lib/media_fill_gen8lp.c

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 45e65dd7..9c0150c1 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -58,7 +58,6 @@ lib_source_list =	 	\
 	media_fill.h            \
 	media_fill_gen7.c       \
 	media_fill_gen8.c       \
-	media_fill_gen8lp.c     \
 	media_fill_gen9.c       \
 	media_spin.h		\
 	media_spin.c		\
diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index f2765fd6..579ce78d 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -180,7 +180,7 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	gen8_emit_state_base_address(batch);
 	gen8_emit_vfe_state_gpgpu(batch);
 	gen7_emit_curbe_load(batch, curbe_buffer);
-	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
+	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
 	gen8_emit_gpgpu_walk(batch, x, y, width, height);
 
 	OUT_BATCH(MI_BATCH_BUFFER_END);
diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
index 172c6db6..7d99dfd9 100644
--- a/lib/gpu_fill.c
+++ b/lib/gpu_fill.c
@@ -118,26 +118,18 @@ gen7_fill_binding_table(struct intel_batchbuffer *batch,
 
 	binding_table = batch_alloc(batch, 32, 64);
 	offset = batch_offset(batch, binding_table);
-	binding_table[0] = gen7_fill_surface_state(batch, dst,
+	if (IS_GEN7(batch->devid))
+		binding_table[0] = gen7_fill_surface_state(batch, dst,
 						GEN7_SURFACEFORMAT_R8_UNORM, 1);
+	else
+		binding_table[0] = gen8_fill_surface_state(batch, dst,
+						GEN8_SURFACEFORMAT_R8_UNORM, 1);
 
 	return offset;
 }
 
 uint32_t
-gen7_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = batch_copy(batch, kernel, size, 64);
-
-	return offset;
-}
-
-uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
+gen7_fill_kernel(struct intel_batchbuffer *batch,
 		const uint32_t kernel[][4],
 		size_t size)
 {
@@ -157,7 +149,7 @@ gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *
 	uint32_t binding_table_offset, kernel_offset;
 
 	binding_table_offset = gen7_fill_binding_table(batch, dst);
-	kernel_offset = gen7_fill_media_kernel(batch, kernel, size);
+	kernel_offset = gen7_fill_kernel(batch, kernel, size);
 
 	idd = batch_alloc(batch, sizeof(*idd), 64);
 	offset = batch_offset(batch, idd);
@@ -272,7 +264,10 @@ gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t in
 	OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
 	OUT_BATCH(0);
 	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
+	if (IS_GEN7(batch->devid))
+		OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
+	else
+		OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
 	/* interface descriptor address, is relative to the dynamics base address */
 	OUT_BATCH(interface_descriptor);
 }
@@ -302,6 +297,8 @@ gen7_emit_media_objects(struct intel_batchbuffer *batch,
 			/* inline data (xoffset, yoffset) */
 			OUT_BATCH(x + i * 16);
 			OUT_BATCH(y + j * 16);
+			if (AT_LEAST_GEN(batch->devid, 8) && !IS_CHERRYVIEW(batch->devid))
+				gen8_emit_media_state_flush(batch);
 		}
 	}
 }
@@ -363,33 +360,6 @@ gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
 	OUT_BATCH(0xffffffff);
 }
 
-void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-
-uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
-	offset = batch_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
 uint32_t
 gen8_fill_surface_state(struct intel_batchbuffer *batch,
 			struct igt_buf *buf,
@@ -441,21 +411,6 @@ gen8_fill_surface_state(struct intel_batchbuffer *batch,
 	return offset;
 }
 
-uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = batch_alloc(batch, 32, 64);
-	offset = batch_offset(batch, binding_table);
-
-	binding_table[0] = gen8_fill_surface_state(batch, dst,
-						GEN8_SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
 uint32_t
 gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size)
 {
@@ -463,8 +418,8 @@ gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *
 	uint32_t offset;
 	uint32_t binding_table_offset, kernel_offset;
 
-	binding_table_offset = gen8_fill_binding_table(batch, dst);
-	kernel_offset = gen8_fill_media_kernel(batch, kernel, size);
+	binding_table_offset = gen7_fill_binding_table(batch, dst);
+	kernel_offset = gen7_fill_kernel(batch, kernel, size);
 
 	idd = batch_alloc(batch, sizeof(*idd), 64);
 	offset = batch_offset(batch, idd);
@@ -522,10 +477,17 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch)
 	OUT_BATCH(1 << 12 | 1);
 }
 
+void
+gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
+	OUT_BATCH(0);
+}
+
 void
 gen8_emit_vfe_state(struct intel_batchbuffer *batch)
 {
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
+	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
 
 	/* scratch buffer */
 	OUT_BATCH(0);
@@ -550,7 +512,7 @@ gen8_emit_vfe_state(struct intel_batchbuffer *batch)
 void
 gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
 {
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
+	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
 
 	/* scratch buffer */
 	OUT_BATCH(0);
@@ -570,92 +532,6 @@ gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
 	OUT_BATCH(0);
 }
 
-void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base address */
-	OUT_BATCH(interface_descriptor);
-}
-
-void
-gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
-	OUT_BATCH(0);
-}
-
-void
-gen8_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-			gen8_emit_media_state_flush(batch);
-		}
-	}
-}
-void
-gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-		}
-	}
-}
 void
 gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
 		     unsigned x, unsigned y,
diff --git a/lib/gpu_fill.h b/lib/gpu_fill.h
index b20430b1..c7cb4078 100644
--- a/lib/gpu_fill.h
+++ b/lib/gpu_fill.h
@@ -69,12 +69,7 @@ gen7_fill_binding_table(struct intel_batchbuffer *batch,
 			struct igt_buf *dst);
 
 uint32_t
-gen7_fill_media_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size);
-
-uint32_t
-gen8_fill_media_kernel(struct intel_batchbuffer *batch,
+gen7_fill_kernel(struct intel_batchbuffer *batch,
 		const uint32_t kernel[][4],
 		size_t size);
 
@@ -107,53 +102,26 @@ gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
 		     unsigned x, unsigned y,
 		     unsigned width, unsigned height);
 
-void
-gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
-
-uint32_t
-gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color);
-
 uint32_t
 gen8_fill_surface_state(struct intel_batchbuffer *batch,
 			struct igt_buf *buf,
 			uint32_t format,
 			int is_dst);
 
-uint32_t
-gen8_fill_binding_table(struct intel_batchbuffer *batch,
-			struct igt_buf *dst);
-
 uint32_t
 gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size);
 
 void
 gen8_emit_state_base_address(struct intel_batchbuffer *batch);
 
-void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch);
-
-void
-gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
-
-void
-gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
-
-void
-gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor);
-
 void
 gen8_emit_media_state_flush(struct intel_batchbuffer *batch);
 
 void
-gen8_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height);
+gen8_emit_vfe_state(struct intel_batchbuffer *batch);
 
 void
-gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height);
+gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
 
 void
 gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 7c04ccf3..10d4dce8 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -796,12 +796,10 @@ igt_fillfunc_t igt_get_media_fillfunc(int devid)
 
 	if (IS_GEN9(devid))
 		fill = gen9_media_fillfunc;
-	else if (IS_BROADWELL(devid))
+	else if (IS_GEN8(devid))
 		fill = gen8_media_fillfunc;
 	else if (IS_GEN7(devid))
 		fill = gen7_media_fillfunc;
-	else if (IS_CHERRYVIEW(devid))
-		fill = gen8lp_media_fillfunc;
 
 	return fill;
 }
diff --git a/lib/media_fill.h b/lib/media_fill.h
index 226489cb..161af8cf 100644
--- a/lib/media_fill.h
+++ b/lib/media_fill.h
@@ -18,13 +18,6 @@ gen7_media_fillfunc(struct intel_batchbuffer *batch,
                 unsigned width, unsigned height,
                 uint8_t color);
 
-void
-gen8lp_media_fillfunc(struct intel_batchbuffer *batch,
-		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
-		uint8_t color);
-
 void
 gen9_media_fillfunc(struct intel_batchbuffer *batch,
                 struct igt_buf *dst,
diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
index 4270997e..362abd61 100644
--- a/lib/media_fill_gen8.c
+++ b/lib/media_fill_gen8.c
@@ -62,7 +62,7 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
 	/* setup states */
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
-	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
+	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
@@ -73,17 +73,17 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
 
 	gen8_emit_vfe_state(batch);
 
-	gen8_emit_curbe_load(batch, curbe_buffer);
+	gen7_emit_curbe_load(batch, curbe_buffer);
 
-	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
+	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
 
-	gen8_emit_media_objects(batch, x, y, width, height);
+	gen7_emit_media_objects(batch, x, y, width, height);
 
 	OUT_BATCH(MI_BATCH_BUFFER_END);
 
 	batch_end = batch_align(batch, 8);
 	igt_assert(batch_end < BATCH_STATE_SPLIT);
 
-	gen8_render_flush(batch, batch_end);
+	gen7_render_flush(batch, batch_end);
 	intel_batchbuffer_reset(batch);
 }
diff --git a/lib/media_fill_gen8lp.c b/lib/media_fill_gen8lp.c
deleted file mode 100644
index dcc11982..00000000
--- a/lib/media_fill_gen8lp.c
+++ /dev/null
@@ -1,87 +0,0 @@
-#include <intel_bufmgr.h>
-#include <i915_drm.h>
-
-#include "media_fill.h"
-#include "gen8_media.h"
-#include "intel_reg.h"
-#include "drmtest.h"
-#include "gpu_fill.h"
-#include <assert.h>
-
-
-static const uint32_t media_kernel[][4] = {
-	{ 0x00400001, 0x20202288, 0x00000020, 0x00000000 },
-	{ 0x00600001, 0x20800208, 0x008d0000, 0x00000000 },
-	{ 0x00200001, 0x20800208, 0x00450040, 0x00000000 },
-	{ 0x00000001, 0x20880608, 0x00000000, 0x000f000f },
-	{ 0x00800001, 0x20a00208, 0x00000020, 0x00000000 },
-	{ 0x00800001, 0x20e00208, 0x00000020, 0x00000000 },
-	{ 0x00800001, 0x21200208, 0x00000020, 0x00000000 },
-	{ 0x00800001, 0x21600208, 0x00000020, 0x00000000 },
-	{ 0x0c800031, 0x24000a40, 0x0e000080, 0x120a8000 },
-	{ 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
-	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
-};
-
-/*
- * This sets up the media pipeline,
- *
- * +---------------+ <---- 4096
- * |       ^       |
- * |       |       |
- * |    various    |
- * |      state    |
- * |       |       |
- * |_______|_______| <---- 2048 + ?
- * |       ^       |
- * |       |       |
- * |   batch       |
- * |    commands   |
- * |       |       |
- * |       |       |
- * +---------------+ <---- 0 + ?
- *
- */
-
-#define BATCH_STATE_SPLIT 2048
-
-void
-gen8lp_media_fillfunc(struct intel_batchbuffer *batch,
-		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
-		uint8_t color)
-{
-	uint32_t curbe_buffer, interface_descriptor;
-	uint32_t batch_end;
-
-	intel_batchbuffer_flush(batch);
-
-	/* setup states */
-	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
-
-	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
-	igt_assert(batch->ptr < &batch->buffer[4095]);
-
-	/* media pipeline */
-	batch->ptr = batch->buffer;
-	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
-	gen8_emit_state_base_address(batch);
-
-	gen8_emit_vfe_state(batch);
-
-	gen8_emit_curbe_load(batch, curbe_buffer);
-
-	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
-
-	gen8lp_emit_media_objects(batch, x, y, width, height);
-
-	OUT_BATCH(MI_BATCH_BUFFER_END);
-
-	batch_end = batch_align(batch, 8);
-	igt_assert(batch_end < BATCH_STATE_SPLIT);
-
-	gen8_render_flush(batch, batch_end);
-	intel_batchbuffer_reset(batch);
-}
diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
index 6accdbe4..d1335fe6 100644
--- a/lib/media_fill_gen9.c
+++ b/lib/media_fill_gen9.c
@@ -59,7 +59,7 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 	/* setup states */
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
-	curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
+	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
 	assert(batch->ptr < &batch->buffer[4095]);
 
@@ -75,11 +75,11 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 
 	gen8_emit_vfe_state(batch);
 
-	gen8_emit_curbe_load(batch, curbe_buffer);
+	gen7_emit_curbe_load(batch, curbe_buffer);
 
-	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
+	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
 
-	gen8_emit_media_objects(batch, x, y, width, height);
+	gen7_emit_media_objects(batch, x, y, width, height);
 
 	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
 			GEN9_FORCE_MEDIA_AWAKE_DISABLE |
@@ -93,6 +93,6 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 	batch_end = batch_align(batch, 8);
 	assert(batch_end < BATCH_STATE_SPLIT);
 
-	gen8_render_flush(batch, batch_end);
+	gen7_render_flush(batch, batch_end);
 	intel_batchbuffer_reset(batch);
 }
diff --git a/lib/meson.build b/lib/meson.build
index 385e08b9..5f2567fb 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -26,7 +26,6 @@ lib_sources = [
 	'ioctl_wrappers.c',
 	'media_fill_gen7.c',
 	'media_fill_gen8.c',
-	'media_fill_gen8lp.c',
 	'media_fill_gen9.c',
 	'media_spin.c',
 	'gpgpu_fill.c',
-- 
2.14.3

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v6 3/4] lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill
  2018-04-10 11:28 ` [igt-dev] [PATCH i-g-t v2] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                     ` (2 preceding siblings ...)
  2018-04-10 14:19   ` [igt-dev] [PATCH i-g-t v6 2/4] lib: Remove duplications in " Katarzyna Dec
@ 2018-04-10 14:19   ` Katarzyna Dec
  2018-04-10 14:19   ` [igt-dev] [PATCH i-g-t v6 4/4] lib: Adjust refactored gpu_fill library to our coding style Katarzyna Dec
  4 siblings, 0 replies; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 14:19 UTC (permalink / raw)
  To: igt-dev

There are missing parameters for Gen8 configuration of gpgpu_fill
that are causing GPU hangs on newer hardware. We need to set the
number of threads in TG in gen8_fill_interface_descriptor. This
field was omitted (apparently without any side effects), but
according to bspec from BDW this field cannot be set to 0. We also
need to use pipeline selection mask to gen9_gpgpu_fillfunc, which
is necessary from SKL.

v2: rebased on refactored library
v3: Removed replacing gen7_emit_interface_descriptor_load with gen8
version in gen9_gpgpgu_fillfunc, because during refactoring gen8
function was removed.
v4: rebase on series new version

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 lib/gpgpu_fill.c | 3 ++-
 lib/gpu_fill.c   | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index 579ce78d..5a77ebd4 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -223,7 +223,8 @@ gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = batch->buffer;
 
 	/* GPGPU pipeline */
-	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
+	OUT_BATCH(GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
+		  PIPELINE_SELECT_GPGPU);
 
 	gen9_emit_state_base_address(batch);
 	gen8_emit_vfe_state_gpgpu(batch);
diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
index 7d99dfd9..102f141b 100644
--- a/lib/gpu_fill.c
+++ b/lib/gpu_fill.c
@@ -438,6 +438,8 @@ gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *
 	idd->desc5.constant_urb_entry_read_offset = 0;
 	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
 
+	idd->desc6.num_threads_in_tg = 1;
+
 	return offset;
 }
 
-- 
2.14.3

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [igt-dev] [PATCH i-g-t v6 4/4] lib: Adjust refactored gpu_fill library to our coding style
  2018-04-10 11:28 ` [igt-dev] [PATCH i-g-t v2] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                     ` (3 preceding siblings ...)
  2018-04-10 14:19   ` [igt-dev] [PATCH i-g-t v6 3/4] lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill Katarzyna Dec
@ 2018-04-10 14:19   ` Katarzyna Dec
  4 siblings, 0 replies; 28+ messages in thread
From: Katarzyna Dec @ 2018-04-10 14:19 UTC (permalink / raw)
  To: igt-dev

While I am making changes in gpgpu and media fill area let's
adjust code to our coding style.

v2: rebased on series new version (patch is now last from
series so change seems larger)
v3: rebased

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 lib/gpgpu_fill.c      | 24 ++++++++++++------------
 lib/gpgpu_fill.h      | 12 ++++++------
 lib/media_fill.h      | 20 ++++++++++----------
 lib/media_fill_gen7.c |  7 +++----
 lib/media_fill_gen8.c |  7 ++++---
 lib/media_fill_gen9.c |  7 ++++---
 6 files changed, 39 insertions(+), 38 deletions(-)

diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index 5a77ebd4..72a1445a 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -99,8 +99,8 @@ static const uint32_t gen9_gpgpu_kernel[][4] = {
 void
 gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -120,8 +120,8 @@ gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 
 	interface_descriptor = gen7_fill_interface_descriptor(batch, dst,
-							      gen7_gpgpu_kernel,
-							      sizeof(gen7_gpgpu_kernel));
+				gen7_gpgpu_kernel, sizeof(gen7_gpgpu_kernel));
+
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	batch->ptr = batch->buffer;
@@ -147,8 +147,8 @@ gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 void
 gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -168,8 +168,8 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 
 	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
-							      gen8_gpgpu_kernel,
-							      sizeof(gen8_gpgpu_kernel));
+				gen8_gpgpu_kernel, sizeof(gen8_gpgpu_kernel));
+
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	batch->ptr = batch->buffer;
@@ -195,8 +195,8 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 void
 gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -216,8 +216,8 @@ gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 
 	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
-							      gen9_gpgpu_kernel,
-							      sizeof(gen9_gpgpu_kernel));
+				gen9_gpgpu_kernel, sizeof(gen9_gpgpu_kernel));
+
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	batch->ptr = batch->buffer;
diff --git a/lib/gpgpu_fill.h b/lib/gpgpu_fill.h
index 7b5c8322..f0d188ae 100644
--- a/lib/gpgpu_fill.h
+++ b/lib/gpgpu_fill.h
@@ -30,22 +30,22 @@
 void
 gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color);
 
 void
 gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color);
 
 void
 gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
 		    struct igt_buf *dst,
-		    unsigned x, unsigned y,
-		    unsigned width, unsigned height,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
 		    uint8_t color);
 
 #endif /* GPGPU_FILL_H */
diff --git a/lib/media_fill.h b/lib/media_fill.h
index 161af8cf..f6db734e 100644
--- a/lib/media_fill.h
+++ b/lib/media_fill.h
@@ -7,22 +7,22 @@
 void
 gen8_media_fillfunc(struct intel_batchbuffer *batch,
 		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
 		uint8_t color);
 
 void
 gen7_media_fillfunc(struct intel_batchbuffer *batch,
-                struct igt_buf *dst,
-                unsigned x, unsigned y,
-                unsigned width, unsigned height,
-                uint8_t color);
+		struct igt_buf *dst,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
+		uint8_t color);
 
 void
 gen9_media_fillfunc(struct intel_batchbuffer *batch,
-                struct igt_buf *dst,
-                unsigned x, unsigned y,
-                unsigned width, unsigned height,
-                uint8_t color);
+		struct igt_buf *dst,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
+		uint8_t color);
 
 #endif /* RENDE_MEDIA_FILL_H */
diff --git a/lib/media_fill_gen7.c b/lib/media_fill_gen7.c
index c97555a6..5a8c32fb 100644
--- a/lib/media_fill_gen7.c
+++ b/lib/media_fill_gen7.c
@@ -47,8 +47,8 @@ static const uint32_t media_kernel[][4] = {
 void
 gen7_media_fillfunc(struct intel_batchbuffer *batch,
 		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
 		uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -61,8 +61,7 @@ gen7_media_fillfunc(struct intel_batchbuffer *batch,
 
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
 	interface_descriptor = gen7_fill_interface_descriptor(batch, dst,
-							      media_kernel,
-							      sizeof(media_kernel));
+					media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
index 362abd61..d6dd7410 100644
--- a/lib/media_fill_gen8.c
+++ b/lib/media_fill_gen8.c
@@ -50,8 +50,8 @@ static const uint32_t media_kernel[][4] = {
 void
 gen8_media_fillfunc(struct intel_batchbuffer *batch,
 		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
 		uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -63,7 +63,8 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
+					media_kernel, sizeof(media_kernel));
 	igt_assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
index d1335fe6..a9a829f2 100644
--- a/lib/media_fill_gen9.c
+++ b/lib/media_fill_gen9.c
@@ -47,8 +47,8 @@ static const uint32_t media_kernel[][4] = {
 void
 gen9_media_fillfunc(struct intel_batchbuffer *batch,
 		struct igt_buf *dst,
-		unsigned x, unsigned y,
-		unsigned width, unsigned height,
+		unsigned int x, unsigned int y,
+		unsigned int width, unsigned int height,
 		uint8_t color)
 {
 	uint32_t curbe_buffer, interface_descriptor;
@@ -60,7 +60,8 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
 
 	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, media_kernel, sizeof(media_kernel));
+	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
+					media_kernel, sizeof(media_kernel));
 	assert(batch->ptr < &batch->buffer[4095]);
 
 	/* media pipeline */
-- 
2.14.3

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [igt-dev] ✗ Fi.CI.IGT: failure for lib/gen6_render: Refactoring lib (rev4)
  2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
                   ` (14 preceding siblings ...)
  2018-04-10 13:46 ` [igt-dev] ✓ Fi.CI.BAT: success for lib/gen6_render: Refactoring lib (rev4) Patchwork
@ 2018-04-10 15:04 ` Patchwork
  15 siblings, 0 replies; 28+ messages in thread
From: Patchwork @ 2018-04-10 15:04 UTC (permalink / raw)
  To: Katarzyna Dec; +Cc: igt-dev

== Series Details ==

Series: lib/gen6_render: Refactoring lib (rev4)
URL   : https://patchwork.freedesktop.org/series/41379/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4040_full -> IGTPW_1240_full =

== Summary - FAILURE ==

  Serious unknown changes coming with IGTPW_1240_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in IGTPW_1240_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce the CI noise.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1240/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in IGTPW_1240_full:

  === IGT changes ===

    ==== Possible regressions ====

    igt@gem_gpgpu_fill:
      shard-apl:          PASS -> FAIL
      shard-kbl:          PASS -> FAIL

    
    ==== Warnings ====

    igt@perf_pmu@rc6:
      shard-kbl:          SKIP -> PASS +1

    
== Known issues ==

  Here are the changes found in IGTPW_1240_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@kms_flip@modeset-vs-vblank-race-interruptible:
      shard-apl:          PASS -> FAIL (fdo#103060)

    
    ==== Possible fixes ====

    igt@kms_cursor_legacy@2x-long-flip-vs-cursor-legacy:
      shard-hsw:          FAIL (fdo#104873) -> PASS

    igt@kms_flip@flip-vs-expired-vblank-interruptible:
      shard-hsw:          FAIL (fdo#105189) -> PASS

    igt@kms_rotation_crc@sprite-rotation-180:
      shard-snb:          FAIL (fdo#103925) -> PASS

    igt@kms_sysfs_edid_timing:
      shard-apl:          WARN (fdo#100047) -> PASS

    
  fdo#100047 https://bugs.freedesktop.org/show_bug.cgi?id=100047
  fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
  fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
  fdo#104873 https://bugs.freedesktop.org/show_bug.cgi?id=104873
  fdo#105189 https://bugs.freedesktop.org/show_bug.cgi?id=105189


== Participating hosts (6 -> 4) ==

  Missing    (2): shard-glk shard-glkb 


== Build changes ==

    * IGT: IGT_4418 -> IGTPW_1240

  CI_DRM_4040: 8e7a3b1c5ebd06c5740b0fea76f46ff23d373bd5 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_1240: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1240/
  IGT_4418: 7c474e011548d35df6b80ceed81d3e6ca560c71d @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  piglit_4418: 45e115f293fd6acc0c9647cf2d3b76be78819ba5 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1240/shards.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v6 1/4] lib: Move common gpgpu/media fill functions to gpu_fill library
  2018-04-10 14:19   ` [igt-dev] [PATCH i-g-t v6 1/4] lib: Move common gpgpu/media fill functions to gpu_fill library Katarzyna Dec
@ 2018-04-10 16:01     ` Antonio Argenziano
  0 siblings, 0 replies; 28+ messages in thread
From: Antonio Argenziano @ 2018-04-10 16:01 UTC (permalink / raw)
  To: Katarzyna Dec, igt-dev


On 10/04/18 07:19, Katarzyna Dec wrote:
> diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
> new file mode 100644
> index 00000000..172c6db6
> --- /dev/null
> +++ b/lib/gpu_fill.c
> @@ -0,0 +1,758 @@

This file also needs a copyright header.

Thanks,
Antonio

> +#include "gpu_fill.h"
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2018-04-10 16:01 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-04-09 15:42 [igt-dev] [PATCH i-g-t] lib/gen6_render: Refactoring lib Lukasz Kalamarz
2018-04-09 16:49 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
2018-04-09 22:07 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
2018-04-10 10:35 ` [igt-dev] [PATCH i-g-t] " Katarzyna Dec
2018-04-10 11:29   ` Kalamarz, Lukasz
2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 0/4] Refactoring of *_fill libraries Katarzyna Dec
2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 1/4] lib: Move common gpgpu/media fill functions to gpu_fill library Katarzyna Dec
2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 2/4] lib: Remove duplications in " Katarzyna Dec
2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 3/4] lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill Katarzyna Dec
2018-04-10 10:48 ` [igt-dev] [PATCH i-g-t v4 4/4] lib: Adjust refactored gpu_fill library to our coding style Katarzyna Dec
2018-04-10 11:28 ` [igt-dev] [PATCH i-g-t v2] lib/gen6_render: Refactoring lib Lukasz Kalamarz
2018-04-10 13:28   ` Katarzyna Dec
2018-04-10 14:19   ` [igt-dev] [PATCH i-g-t v6 1/4] lib: Move common gpgpu/media fill functions to gpu_fill library Katarzyna Dec
2018-04-10 16:01     ` Antonio Argenziano
2018-04-10 14:19   ` [igt-dev] [PATCH i-g-t v6 2/4] lib: Remove duplications in " Katarzyna Dec
2018-04-10 14:19   ` [igt-dev] [PATCH i-g-t v6 3/4] lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill Katarzyna Dec
2018-04-10 14:19   ` [igt-dev] [PATCH i-g-t v6 4/4] lib: Adjust refactored gpu_fill library to our coding style Katarzyna Dec
2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 0/4] Refactoring of *_fill libraries Katarzyna Dec
2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 1/4] lib: Move common gpgpu/media fill functions to gpu_fill library Katarzyna Dec
2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 2/4] lib: Remove duplications in " Katarzyna Dec
2018-04-10 13:14   ` Kalamarz, Lukasz
2018-04-10 13:35     ` Katarzyna Dec
2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 3/4] lib/gpgpu_fill: Add missing configuration parameters for gpgpu_fill Katarzyna Dec
2018-04-10 13:16   ` Kalamarz, Lukasz
2018-04-10 11:34 ` [igt-dev] [PATCH i-g-t v5 4/4] lib: Adjust refactored gpu_fill library to our coding style Katarzyna Dec
2018-04-10 13:42   ` Katarzyna Dec
2018-04-10 13:46 ` [igt-dev] ✓ Fi.CI.BAT: success for lib/gen6_render: Refactoring lib (rev4) Patchwork
2018-04-10 15:04 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox