Igt-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH] XE2: lib/gpgpu_fill: Port gpgpu_fillfunc
@ 2023-11-02 17:10 Jagmeet Randhawa
  2023-11-02 18:24 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Jagmeet Randhawa @ 2023-11-02 17:10 UTC (permalink / raw)
  Cc: igt-dev

Add xe2lpg_gpgpu_fillfunc to have gpgpu_fill running on XE2
On XE2 there are a few changes to gpu command instruction lengths.

There's also no 'Media Block Write' message, thus 'Typed 2D Block
Store' message has to be used in the shader.

The shader was compiled using the following command:

iga64 -p=2 -Wall -Xprint-ldst -Xauto-deps --assemble xe2hp_gpgpu_kernel.asm
| od -A n -v -t x4 |sed -e 's/ / 0x/g' | sed -e 's/^/\t{/' | sed -e
's/([0-9]|[a-f]|[A-F]) /\1, /g' | sed -e 's/$/ },/g' | sed -e 's/\t /\t/g'

Signed-off-by: Jagmeet Randhawa <jagmeet.randhawa@intel.com>
Signed-off-by: Christoph Manszewski <christoph.manszewski@intel.com>
---
 lib/gpgpu_fill.c                              | 23 ++++++++
 lib/gpgpu_fill.h                              |  6 +++
 lib/gpu_cmds.c                                | 53 ++++++++++++++++---
 .../shaders/gpgpu/xe2lpg_gpgpu_kernel.asm     | 13 +++++
 lib/intel_batchbuffer.c                       |  4 +-
 5 files changed, 90 insertions(+), 9 deletions(-)
 create mode 100644 lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm

diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index eed821872..1270c2b22 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -124,6 +124,18 @@ static const uint32_t xehpc_gpgpu_kernel[][4] = {
 	{ 0x000c0031, 0x00000004, 0x3000500c, 0x00000000 },
 };
 
+static const uint32_t xe2lpg_gpgpu_kernel[][4] = {
+	{ 0x00080061, 0x01050000, 0x00000104, 0x00000000 },
+	{ 0x00000069, 0x02058220, 0x02000014, 0x00000004 },
+	{ 0x00000061, 0x02150220, 0x00000064, 0x00000000 },
+	{ 0x00100061, 0x04054220, 0x00000000, 0x00000000 },
+	{ 0x00041a61, 0x04550220, 0x00220205, 0x00000000 },
+	{ 0x00000061, 0x04754550, 0x00000000, 0x000f000f },
+	{ 0x00101e61, 0x05050220, 0x00000104, 0x00000000 },
+	{ 0x00132031, 0x00000000, 0xd00e0494, 0x04000000 },
+	{ 0x000c0031, 0x00000004, 0x3000500c, 0x00000000 },
+};
+
 /*
  * This sets up the gpgpu pipeline,
  *
@@ -398,3 +410,14 @@ void xehpc_gpgpu_fillfunc(int i915,
 			      xehpc_gpgpu_kernel,
 			      sizeof(xehpc_gpgpu_kernel));
 }
+
+void xe2lpg_gpgpu_fillfunc(int i915,
+			   struct intel_buf *buf,
+			   unsigned int x, unsigned int y,
+			   unsigned int width, unsigned int height,
+			   uint8_t color)
+{
+	__xehp_gpgpu_fillfunc(i915, buf, x, y, width, height, color,
+			      xe2lpg_gpgpu_kernel,
+			      sizeof(xe2lpg_gpgpu_kernel));
+}
diff --git a/lib/gpgpu_fill.h b/lib/gpgpu_fill.h
index f81cd0b53..c3b47c10a 100644
--- a/lib/gpgpu_fill.h
+++ b/lib/gpgpu_fill.h
@@ -75,4 +75,10 @@ xehpc_gpgpu_fillfunc(int i915,
 		     unsigned int width, unsigned int height,
 		     uint8_t color);
 
+void xe2lpg_gpgpu_fillfunc(int i915,
+			   struct intel_buf *buf,
+			   unsigned int x, unsigned int y,
+			   unsigned int width, unsigned int height,
+			   uint8_t color);
+
 #endif /* GPGPU_FILL_H */
diff --git a/lib/gpu_cmds.c b/lib/gpu_cmds.c
index f19f93b28..944d3d6a6 100644
--- a/lib/gpu_cmds.c
+++ b/lib/gpu_cmds.c
@@ -328,7 +328,30 @@ fill_binding_table(struct intel_bb *ibb, struct intel_buf *buf)
 	binding_table = intel_bb_ptr(ibb);
 	intel_bb_ptr_add(ibb, 64);
 
-	if (intel_graphics_ver(devid) >= IP_VER(12, 50))
+	if (intel_graphics_ver(devid) >= IP_VER(20, 0)){
+		/*
+		* XXX: Up until now, SURFACEFORMAT_R8_UNROM was used regardless of the 'bpp' value.
+		* For bpp 32 this results in a surface that is 4x narrower than expected. However
+		* it worked, because the 'Media Block Read/Write' message assumes the surface width
+		* is always in units of dwords.
+		*
+		* Since Xe2 the Media Block Write message got replaced with 'Typed 2D Block
+		* Load/Store Message' which correctly interprets the surface format.
+		*/
+		if (buf->bpp == 32)
+			binding_table[0] = xehp_fill_surface_state(ibb, buf,
+								      SURFACEFORMAT_R8G8B8A8_UNORM,
+								      1);
+		else if (buf->bpp == 8)
+			binding_table[0] = xehp_fill_surface_state(ibb, buf,
+								      SURFACEFORMAT_R8_UNORM,
+								      1);
+		else
+			igt_assert_f(false,
+				     "Surface state for bpp = %u not implemented",
+				     buf->bpp);
+	}
+	else if (intel_graphics_ver(devid) >= IP_VER(12, 50))
 		binding_table[0] = xehp_fill_surface_state(ibb, buf,
 							   SURFACEFORMAT_R8_UNORM, 1);
 	else if (intel_graphics_ver(devid) >= IP_VER(9, 0))
@@ -959,8 +982,12 @@ xehp_emit_cfe_state(struct intel_bb *ibb, uint32_t threads)
 void
 xehp_emit_state_compute_mode(struct intel_bb *ibb)
 {
-	intel_bb_out(ibb, XEHP_STATE_COMPUTE_MODE);
+	uint32_t dword_length = intel_graphics_ver(ibb->devid) >= IP_VER(20, 0);
+	intel_bb_out(ibb, XEHP_STATE_COMPUTE_MODE | dword_length);
 	intel_bb_out(ibb, 0);
+
+	if (dword_length)
+		intel_bb_out(ibb, 0);
 }
 
 void
@@ -976,6 +1003,8 @@ xehp_emit_state_binding_table_pool_alloc(struct intel_bb *ibb)
 void
 xehp_emit_state_base_address(struct intel_bb *ibb)
 {
+	uint32_t tmp;
+
 	intel_bb_out(ibb, GEN8_STATE_BASE_ADDRESS | 0x14);            //dw0
 
 	/* general */
@@ -983,7 +1012,8 @@ xehp_emit_state_base_address(struct intel_bb *ibb)
 	intel_bb_out(ibb, 0);
 
 	/* stateless data port */
-	intel_bb_out(ibb, 0 | BASE_ADDRESS_MODIFY);                   //dw3
+	tmp = intel_graphics_ver(ibb->devid) == IP_VER(20, 0) ? 0 : BASE_ADDRESS_MODIFY;
+	intel_bb_out(ibb, 0 | tmp);                  //dw3
 
 	/* surface */
 	intel_bb_emit_reloc(ibb, ibb->handle, I915_GEM_DOMAIN_SAMPLER, //dw4-dw5
@@ -1008,7 +1038,10 @@ xehp_emit_state_base_address(struct intel_bb *ibb)
 	/* dynamic state buffer size */
 	intel_bb_out(ibb, 1 << 12 | 1);                             //dw13
 	/* indirect object buffer size */
-	intel_bb_out(ibb, 0xfffff000 | 1);                          //dw14
+	if (intel_graphics_ver(ibb->devid) == IP_VER(20, 0))	    //dw14
+		intel_bb_out(ibb, 0);
+	else
+		intel_bb_out(ibb, 0xfffff000 | 1);
 	/* intruction buffer size */
 	intel_bb_out(ibb, 1 << 12 | 1);                             //dw15
 
@@ -1030,7 +1063,7 @@ xehp_emit_compute_walk(struct intel_bb *ibb,
 		       struct xehp_interface_descriptor_data *pidd,
 		       uint8_t color)
 {
-	uint32_t x_dim, y_dim, mask;
+	uint32_t x_dim, y_dim, mask, dword_length;
 
 	/*
 	 * Simply do SIMD16 based dispatch, so every thread uses
@@ -1051,8 +1084,9 @@ xehp_emit_compute_walk(struct intel_bb *ibb,
 		mask = (1 << 16) - 1;
 	else
 		mask = (1 << mask) - 1;
-
-	intel_bb_out(ibb, XEHP_COMPUTE_WALKER | 0x25);
+	
+	dword_length = intel_graphics_ver(ibb->devid) >= IP_VER(20, 0) ? 0x26 : 0x25;
+	intel_bb_out(ibb, XEHP_COMPUTE_WALKER | dword_length);
 
 	intel_bb_out(ibb, 0); /* debug object */		//dw1
 	intel_bb_out(ibb, 0); /* indirect data length */	//dw2
@@ -1090,9 +1124,12 @@ xehp_emit_compute_walk(struct intel_bb *ibb,
 	intel_bb_out(ibb, 0);					//dw15
 	intel_bb_out(ibb, 0);					//dw16
 	intel_bb_out(ibb, 0);					//dw17
+	
+	if (intel_graphics_ver(ibb->devid) >= IP_VER(20, 0))	//XE2:dw18
+		intel_bb_out(ibb, 0);
 
 	/* Interface descriptor data */
-	for (int i = 0; i < 8; i++) {			       //dw18-25
+	for (int i = 0; i < 8; i++) {			       //dw18-25 (XE2:dw19-26)
 		intel_bb_out(ibb, ((uint32_t *) pidd)[i]);
 	}
 
diff --git a/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm b/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm
new file mode 100644
index 000000000..e2ecc71f5
--- /dev/null
+++ b/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm
@@ -0,0 +1,13 @@
+L0:
+         mov (4|M0)               r1.0<1>:ub    r1.0<0;1,0>:ub                        // Load r1.0-3 with color byte
+         shl (1|M0)               r2.0<1>:ud    r0.1<0;1,0>:ud    0x4:ud              // Load r2.0-3 with tg id X << 4
+         mov (1|M0)               r2.1<1>:ud    r0.6<0;1,0>:ud                        // Load r2.4-7 with tg id Y
+
+         // payload setup
+         mov (16|M0)              r4.0<1>:ud    0x0:ud                                // Zero out register R4
+         mov (2|M0)               r4.5<1>:ud    r2.0<2;2,1>:ud                        // Store X and Y block start (160:191 and 192:223)
+         mov (1|M0)               r4.14<1>:w    0xF:w                                 // Store X and Y block size (224:231 and 232:239)
+         mov (16|M0)              r5.0<1>:ud    r1.0<0;1,0>:ud                        // Load r5-r6 with color byte
+
+         send.tgm (16|M0)         null     r4    null:0    0x0    0x64000007          // Send TypedStore2DBlock to tgm port
+         send.gtwy (8|M0)         null    r80    null:0    0x0    0x02000000 {EOT}
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index df82ef5f5..d23c04073 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -755,7 +755,9 @@ igt_fillfunc_t igt_get_gpgpu_fillfunc(int devid)
 {
 	igt_fillfunc_t fill = NULL;
 
-	if (IS_METEORLAKE(devid))
+	if (intel_graphics_ver(devid) >= IP_VER(20, 0))
+                fill = xe2lpg_gpgpu_fillfunc;
+	else if (IS_METEORLAKE(devid))
 		fill = xehp_gpgpu_fillfunc;
 	else if (intel_graphics_ver(devid) >= IP_VER(12, 60))
 		fill = xehpc_gpgpu_fillfunc;
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-11-03 16:15 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-11-02 17:10 [igt-dev] [PATCH] XE2: lib/gpgpu_fill: Port gpgpu_fillfunc Jagmeet Randhawa
2023-11-02 18:24 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
2023-11-02 18:47 ` [igt-dev] ✓ CI.xeBAT: " Patchwork
2023-11-02 19:28 ` [igt-dev] [PATCH] " Grzegorzek, Dominik
2023-11-03 10:00 ` Manszewski, Christoph
2023-11-03 16:15 ` [igt-dev] ✗ Fi.CI.IGT: failure for " Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox